Esempio n. 1
0
def handleLine(line):
    # Parse line
    m = lineParser.match(line.rstrip())
    assert(m.lastindex == 1 or m.lastindex == 2)
    url = Url(m.group(1))
    sourceGroupName = None
    if(m.lastindex == 2):
        sourceGroupName = m.group(2)

    # Add source
    if not sourceExists(url):
        print("Adding " + url.value)
        webFeed = itemFactory(url)
        #if not hasSimilarSource(webFeed):
        addSource(url, webFeed.name)
        sourceId = urlToLookupId(url.value)

        crawl(webFeed, sourceId)

        print "https://ps4m.com/s/%d" % (sourceId)
        #else:
        #    print "NOT ADDING!"
        #    return
    else:
        print (url.value + " already exists")

    # If nessecary, assign source to group
    if(sourceGroupName is not None):
        print "\tAdding to %s" % (sourceGroupName)
        sourceId = urlToLookupId(url.value)
        addSourceGroupAssignment(sourceId, sourceGroupName)
    return
Esempio n. 2
0
def backEnd_run(dep):
    # Crawl through the URLs provided in urls.txt
    crawler.crawl(depth=int(dep))

    # Retrieve Data needed for populating the SQL Tables
    doc_index = crawler.get_docs_cache()
    inverted_index = crawler.get_inverted_index()
    anchor_db = crawler.get_anchor_db()
    lexicon = crawler.get_lexicon()
    pg_rank = page_rank(crawler.get_links_queue())
    titles_list = crawler.get_title_cache()
    resolved_inverted_index = crawler.get_resovled_inverted_index()
    description = crawler.get_desc_cache()
    images = crawler.get_image_cache()

    return doc_index, titles_list, lexicon, anchor_db, pg_rank, inverted_index, description, images, resolved_inverted_index
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--url", help="Web page url to crawl")
    parser.add_argument("--depth", help="Crawl depth, defaults to 3")
    args = parser.parse_args()

    try:
        url = args.url or input("Enter web page url to crawl: ")

        depth = 3
        if args.depth:
            depth = int(args.depth)

        crawler.set_depth(depth)
        crawler.crawl(Job(0, url))
    except KeyboardInterrupt as e:
        print("\nOperation aborted by user")
Esempio n. 4
0
from crawler import crawler

crawler = crawler(None, 'urls.txt')
crawler.crawl(depth=1)
crawler.lexicon_to_DB()
crawler.invertedIndex_to_DB()
crawler.page_rank_to_DB()
crawler.docIndex_to_DB()
Esempio n. 5
0
from crawler import crawler
from pagerank import page_rank

# Get crawler object and crawl on urls found in urls.txt
crawler = crawler(None, 'urls.txt')
crawler.crawl()

document_index = crawler.get_document_index()

# Run pagerank on the links generated by the crawler
pagerank = page_rank(crawler._links)

for doc_id, rank in sorted(pagerank.iteritems(), key=lambda (k,v): (v,k), reverse=True):
    document = crawler._document_index[doc_id]
    print str(rank) + " : " + str(document[0]) + "\n"
Esempio n. 6
0
    def runProgram(self, client, program, function, type, parameter):
        key = self.getUsersKey(client)
        if program == 1:
            if function == 1:
                if self.users[key]['type'] == 10:
                    return 'a\t핸드폰에서 알람 기능을 사용할 수 없습니다.'
                strs = ''
                if type == 1:
                    if isinstance(parameter['date-time'], dict):
                        self.alarm.addAlarm2(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + '에 알람을 맞췄습니다.'
                        print(self.alarm.dateToString(parameter['date-time']['startDateTime']))
                    else:
                        self.alarm.addAlarm2(self.alarm.getDatetime(parameter['date-time']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + '에 알람을 맞췄습니다.'
                        print(self.alarm.dateToString(parameter['date-time']))
                    self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client)))
                    return strs
                if type == 2:
                    if isinstance(parameter['date-time'], dict):
                        self.alarm.addAlarm(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.alarm.contentAnal(parameter['AlarmContent']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + '에 ' + self.alarm.contentAnal(parameter['AlarmContent']) + ' 알람을 맞췄습니다.'
                    else:
                        self.alarm.addAlarm(self.alarm.getDatetime(parameter['date-time']), self.alarm.contentAnal(parameter['AlarmContent']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + '에 ' + self.alarm.contentAnal(parameter['AlarmContent']) + ' 알람을 맞췄습니다.'
                    self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client)))
                    return strs
            elif function == 2:
                if type == 1:
                    if isinstance(parameter['date-time'], dict):
                        self.alarm.removeAlarm(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + ' 알람을 삭제하였습니다.'
                    else:
                        self.alarm.removeAlarm(self.alarm.getDatetime(parameter['date-time']), self.getUsersKey(client))
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + ' 알람을 삭제하였습니다.'
                    self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client)))
                    return strs
            elif function == 3:
                if type == 1:
                    time1 = None
                    time2 = None
                    if isinstance(parameter['date-time'], dict):
                        time1 = self.alarm.getDatetime(parameter['date-time']['startDateTime'])
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + ' 알람을 '
                    else:
                        time1 = self.alarm.getDatetime(parameter['date-time'])
                        strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + ' 알람을 '
                    if isinstance(parameter['date-time1'], dict):
                        time2 = self.alarm.getDatetime(parameter['date-time1']['startDateTime'])
                        strs += self.alarm.dateToString(parameter['date-time1']['startDateTime']) + ' 로 수정하였습니다.'
                    else:
                        time2 = self.alarm.getDatetime(parameter['date-time1'])
                        strs += self.alarm.dateToString(parameter['date-time1']) + ' 로 수정하였습니다.'
                    self.alarm.updateAlarm(time1, time2, self.getUsersKey(client))
                    self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client)))
                    return strs


        elif program == 2:
            if function == 1:
                if type == 1:
                    #[2-1-A-1]
                    window = self.getInClient(parameter['WindowName'], 2, self.users[key]['ho'], self.users[key]['dong'])
                    if window is None:
                        return "a\t존재하지 않는 창문입니다."
                    self.window.openWindow(window)
            elif function == 2:
                if type == 1:
                    #[2-2-A-1]
                    window = self.getInClient(parameter['WindowName'], 2, self.users[key]['ho'], self.users[key]['dong'])
                    if window is None:
                        return "a\t존재하지 않는 창문입니다."
                    self.window.closeWindow(window)

        elif program == 3:
            if function == 1:
                if type == 1:
                    #[3-1-A-1]
                    curtain = self.getInClient(parameter['WindowName'], 3, self.users[key]['ho'], self.users[key]['dong'])

                    if curtain is None:
                        return "a\t존재하지 않는 커튼입니다."
                    print('커텐열기')
                    self.curtain.openCurtain(curtain)
            elif function == 2:
                if type == 1:
                    #[3-2-A-1]
                    curtain = self.getInClient(parameter['WindowName'], 3, self.users[key]['ho'], self.users[key]['dong'])

                    if curtain is None:
                        return "a\t존재하지 않는 커튼입니다."
                    print('커텐닫기')
                    self.curtain.closeCurtain(curtain)

        elif program == 4:
            if function == 1:
                if type == 1:
                    doorlock = self.getDoorlock(4, self.users[key]['ho'], self.users[key]['dong'])
                    if doorlock is None:
                        return "a\t도어락이 존재하지 않습니다."
                    self.sendMessage(self.users[doorlock]['client'], 'open')

            elif function == 2:
                if type == 1:
                    doorlock = self.getDoorlock(4, self.users[key]['ho'], self.users[key]['dong'])
                    if doorlock is None:
                        return "a\t도어락이 존재하지 않습니다."
                    self.sendMessage(self.users[doorlock]['client'], 'enroll')

        elif program == 6:
            if function == 1:
                if type == 1:
                    dtnow = datetime.now()
                    dt = datetime.strptime(parameter['date-time'], '%Y-%m-%dT%H:%M:%S+09:00')
                    dw = (dt.date() - dtnow.date()).days
                    ww = ['오늘', '내일', '모레']
                    if dw == 0:
                        weather = weatherToday(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨'))
                        if weather is None:
                            return 'a\t알 수 없는 지역입니다.'
                        m = '오늘 ' + weather['날씨']['지역'] + ' 날씨는 ' + weather['날씨']['날씨'] + '. 현재온도는 ' + weather['날씨']['온도'].split('씨℃')[0] + ', 최저기온은 ' + weather['날씨']['최저기온'] + ', 최고기온은 ' + weather['날씨']['최고기온'] + ' 입니다. 체감온도는 ' + weather['날씨']['체감온도'] + ' 입니다.'
                        if self.users[key]['type'] == 10:
                            return 'a\t' + m
                        return 'weathertoday\t' + m + '\t' + str(weather)
                    elif dw == 1:
                        weather = weatherTomorrow(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨'))
                        if weather is None:
                            return 'a\t알 수 없는 지역입니다.'
                        m = weather['날씨']['지역'] + '의 내일 오전 날씨는 ' + weather['날씨']['오전날씨'] + '이고 ' + weather['날씨']['오전온도'].split('씨℃')[0] + '입니다. ' + '오후 날씨는 ' + weather['날씨']['오후날씨'] + '이고 ' + weather['날씨']['오후온도'].split('씨℃')[0] + '입니다. '
                        if self.users[key]['type'] == 10:
                            return 'a\t' + m
                        return 'weathertommorow\t' + m + '\t' + str(weather)
                    elif dw == 2:
                        weather = weatherAfterTommorow(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨'))
                        if weather is None:
                            return 'a\t알 수 없는 지역입니다.'
                        m = weather['날씨']['지역'] + '의 내일 모레의 오전 날씨는 ' + weather['날씨']['오전날씨'] + '이고 ' + weather['날씨']['오전온도'].split('씨℃')[0] + '입니다. ' + '오후 날씨는 ' + weather['날씨']['오후날씨'] + '이고 ' + weather['날씨']['오후온도'].split('씨℃')[0] + '입니다. '
                        if self.users[key]['type'] == 10:
                            return 'a\t' + m
                        return 'weatheraftertommorow\t' + m + '\t' + str(weather)
                    else:
                        return 'a\t' + '이 날의 날씨는 모르겠어요~'
                elif type == 2:
                        weather = weatherToday(crawl('오늘' + '%20' + parameter['Location'] + '%20날씨'))
                        if weather is None:
                            return 'a\t알 수 없는 지역입니다.'
                        m = '오늘 ' + weather['날씨']['지역'] + ' 날씨는 ' + weather['날씨']['날씨'] + '. 현재온도는 ' + weather['날씨']['온도'].split('씨℃')[0] + ', 최저기온은 ' + weather['날씨']['최저기온'] + ', 최고기온은 ' + weather['날씨']['최고기온'] + ' 입니다. 체감온도는 ' + weather['날씨']['체감온도'] + ' 입니다.'
                        if self.users[key]['type'] == 10:
                            return 'a\t' + m
                        return 'weathertoday\t' + m + '\t' + str(weather)

        elif program == 7:
            if function == 1:
                if type == 1:
                    wiki = wikiCrawler.WikiCrawler()
                    data = wiki.get(parameter['WikiName'])
                    if data is None:
                        return 'a\t제가 알고 있는 단어가 아니에요.'
                    return 'a\t' + wiki.get(parameter['WikiName'])

        elif program == 8:
            if function == 1:
                if type == 1:
                    ch = False
                    tell = self.analMsg(parameter['Tell'])
                    parameter['number'] = int(float(parameter['number']))
                    parameter['number1'] = int(float(parameter['number1']))
                    for k in self.users:
                        print(str(self.users[k]['dong']) + ' ' + str(parameter['number']) + ' ' + str(self.users[k]['ho']) + ' ' + str(parameter['number1']))
                        if self.users[k]['type'] == 0 and self.users[k]['dong'] == parameter['number'] and self.users[k]['ho'] == parameter['number1'] :
                            self.sendMessage(self.users[k]['client'], 'msg\ta\t{dong}동 {ho}호로 부터 메시지가 도착했습니다. {tell}'.format(dong=self.users[key]['dong'], ho=self.users[key]['ho'], tell=tell))
                            ch = True
                    if not ch:
                        return 'a\t해당 집 스피커가 접속 중이 아닙니다.'
                    return 'a\t{dong}동 {ho}호에게 {tell} 메시지를 보냈습니다.'.format(dong=parameter['number'], ho=parameter['number1'], tell=tell)
        return None