def handleLine(line): # Parse line m = lineParser.match(line.rstrip()) assert(m.lastindex == 1 or m.lastindex == 2) url = Url(m.group(1)) sourceGroupName = None if(m.lastindex == 2): sourceGroupName = m.group(2) # Add source if not sourceExists(url): print("Adding " + url.value) webFeed = itemFactory(url) #if not hasSimilarSource(webFeed): addSource(url, webFeed.name) sourceId = urlToLookupId(url.value) crawl(webFeed, sourceId) print "https://ps4m.com/s/%d" % (sourceId) #else: # print "NOT ADDING!" # return else: print (url.value + " already exists") # If nessecary, assign source to group if(sourceGroupName is not None): print "\tAdding to %s" % (sourceGroupName) sourceId = urlToLookupId(url.value) addSourceGroupAssignment(sourceId, sourceGroupName) return
def backEnd_run(dep): # Crawl through the URLs provided in urls.txt crawler.crawl(depth=int(dep)) # Retrieve Data needed for populating the SQL Tables doc_index = crawler.get_docs_cache() inverted_index = crawler.get_inverted_index() anchor_db = crawler.get_anchor_db() lexicon = crawler.get_lexicon() pg_rank = page_rank(crawler.get_links_queue()) titles_list = crawler.get_title_cache() resolved_inverted_index = crawler.get_resovled_inverted_index() description = crawler.get_desc_cache() images = crawler.get_image_cache() return doc_index, titles_list, lexicon, anchor_db, pg_rank, inverted_index, description, images, resolved_inverted_index
def main(): parser = argparse.ArgumentParser() parser.add_argument("--url", help="Web page url to crawl") parser.add_argument("--depth", help="Crawl depth, defaults to 3") args = parser.parse_args() try: url = args.url or input("Enter web page url to crawl: ") depth = 3 if args.depth: depth = int(args.depth) crawler.set_depth(depth) crawler.crawl(Job(0, url)) except KeyboardInterrupt as e: print("\nOperation aborted by user")
from crawler import crawler crawler = crawler(None, 'urls.txt') crawler.crawl(depth=1) crawler.lexicon_to_DB() crawler.invertedIndex_to_DB() crawler.page_rank_to_DB() crawler.docIndex_to_DB()
from crawler import crawler from pagerank import page_rank # Get crawler object and crawl on urls found in urls.txt crawler = crawler(None, 'urls.txt') crawler.crawl() document_index = crawler.get_document_index() # Run pagerank on the links generated by the crawler pagerank = page_rank(crawler._links) for doc_id, rank in sorted(pagerank.iteritems(), key=lambda (k,v): (v,k), reverse=True): document = crawler._document_index[doc_id] print str(rank) + " : " + str(document[0]) + "\n"
def runProgram(self, client, program, function, type, parameter): key = self.getUsersKey(client) if program == 1: if function == 1: if self.users[key]['type'] == 10: return 'a\t핸드폰에서 알람 기능을 사용할 수 없습니다.' strs = '' if type == 1: if isinstance(parameter['date-time'], dict): self.alarm.addAlarm2(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + '에 알람을 맞췄습니다.' print(self.alarm.dateToString(parameter['date-time']['startDateTime'])) else: self.alarm.addAlarm2(self.alarm.getDatetime(parameter['date-time']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + '에 알람을 맞췄습니다.' print(self.alarm.dateToString(parameter['date-time'])) self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client))) return strs if type == 2: if isinstance(parameter['date-time'], dict): self.alarm.addAlarm(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.alarm.contentAnal(parameter['AlarmContent']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + '에 ' + self.alarm.contentAnal(parameter['AlarmContent']) + ' 알람을 맞췄습니다.' else: self.alarm.addAlarm(self.alarm.getDatetime(parameter['date-time']), self.alarm.contentAnal(parameter['AlarmContent']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + '에 ' + self.alarm.contentAnal(parameter['AlarmContent']) + ' 알람을 맞췄습니다.' self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client))) return strs elif function == 2: if type == 1: if isinstance(parameter['date-time'], dict): self.alarm.removeAlarm(self.alarm.getDatetime(parameter['date-time']['startDateTime']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + ' 알람을 삭제하였습니다.' else: self.alarm.removeAlarm(self.alarm.getDatetime(parameter['date-time']), self.getUsersKey(client)) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + ' 알람을 삭제하였습니다.' self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client))) return strs elif function == 3: if type == 1: time1 = None time2 = None if isinstance(parameter['date-time'], dict): time1 = self.alarm.getDatetime(parameter['date-time']['startDateTime']) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']['startDateTime']) + ' 알람을 ' else: time1 = self.alarm.getDatetime(parameter['date-time']) strs = 'a\t' + self.alarm.dateToString(parameter['date-time']) + ' 알람을 ' if isinstance(parameter['date-time1'], dict): time2 = self.alarm.getDatetime(parameter['date-time1']['startDateTime']) strs += self.alarm.dateToString(parameter['date-time1']['startDateTime']) + ' 로 수정하였습니다.' else: time2 = self.alarm.getDatetime(parameter['date-time1']) strs += self.alarm.dateToString(parameter['date-time1']) + ' 로 수정하였습니다.' self.alarm.updateAlarm(time1, time2, self.getUsersKey(client)) self.sendMessage(client, 'getalarm\t' + self.alarm.loadAlarm(self.getUsersKey(client))) return strs elif program == 2: if function == 1: if type == 1: #[2-1-A-1] window = self.getInClient(parameter['WindowName'], 2, self.users[key]['ho'], self.users[key]['dong']) if window is None: return "a\t존재하지 않는 창문입니다." self.window.openWindow(window) elif function == 2: if type == 1: #[2-2-A-1] window = self.getInClient(parameter['WindowName'], 2, self.users[key]['ho'], self.users[key]['dong']) if window is None: return "a\t존재하지 않는 창문입니다." self.window.closeWindow(window) elif program == 3: if function == 1: if type == 1: #[3-1-A-1] curtain = self.getInClient(parameter['WindowName'], 3, self.users[key]['ho'], self.users[key]['dong']) if curtain is None: return "a\t존재하지 않는 커튼입니다." print('커텐열기') self.curtain.openCurtain(curtain) elif function == 2: if type == 1: #[3-2-A-1] curtain = self.getInClient(parameter['WindowName'], 3, self.users[key]['ho'], self.users[key]['dong']) if curtain is None: return "a\t존재하지 않는 커튼입니다." print('커텐닫기') self.curtain.closeCurtain(curtain) elif program == 4: if function == 1: if type == 1: doorlock = self.getDoorlock(4, self.users[key]['ho'], self.users[key]['dong']) if doorlock is None: return "a\t도어락이 존재하지 않습니다." self.sendMessage(self.users[doorlock]['client'], 'open') elif function == 2: if type == 1: doorlock = self.getDoorlock(4, self.users[key]['ho'], self.users[key]['dong']) if doorlock is None: return "a\t도어락이 존재하지 않습니다." self.sendMessage(self.users[doorlock]['client'], 'enroll') elif program == 6: if function == 1: if type == 1: dtnow = datetime.now() dt = datetime.strptime(parameter['date-time'], '%Y-%m-%dT%H:%M:%S+09:00') dw = (dt.date() - dtnow.date()).days ww = ['오늘', '내일', '모레'] if dw == 0: weather = weatherToday(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨')) if weather is None: return 'a\t알 수 없는 지역입니다.' m = '오늘 ' + weather['날씨']['지역'] + ' 날씨는 ' + weather['날씨']['날씨'] + '. 현재온도는 ' + weather['날씨']['온도'].split('씨℃')[0] + ', 최저기온은 ' + weather['날씨']['최저기온'] + ', 최고기온은 ' + weather['날씨']['최고기온'] + ' 입니다. 체감온도는 ' + weather['날씨']['체감온도'] + ' 입니다.' if self.users[key]['type'] == 10: return 'a\t' + m return 'weathertoday\t' + m + '\t' + str(weather) elif dw == 1: weather = weatherTomorrow(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨')) if weather is None: return 'a\t알 수 없는 지역입니다.' m = weather['날씨']['지역'] + '의 내일 오전 날씨는 ' + weather['날씨']['오전날씨'] + '이고 ' + weather['날씨']['오전온도'].split('씨℃')[0] + '입니다. ' + '오후 날씨는 ' + weather['날씨']['오후날씨'] + '이고 ' + weather['날씨']['오후온도'].split('씨℃')[0] + '입니다. ' if self.users[key]['type'] == 10: return 'a\t' + m return 'weathertommorow\t' + m + '\t' + str(weather) elif dw == 2: weather = weatherAfterTommorow(crawl(ww[dw] + '%20' + parameter['Location'] + '%20날씨')) if weather is None: return 'a\t알 수 없는 지역입니다.' m = weather['날씨']['지역'] + '의 내일 모레의 오전 날씨는 ' + weather['날씨']['오전날씨'] + '이고 ' + weather['날씨']['오전온도'].split('씨℃')[0] + '입니다. ' + '오후 날씨는 ' + weather['날씨']['오후날씨'] + '이고 ' + weather['날씨']['오후온도'].split('씨℃')[0] + '입니다. ' if self.users[key]['type'] == 10: return 'a\t' + m return 'weatheraftertommorow\t' + m + '\t' + str(weather) else: return 'a\t' + '이 날의 날씨는 모르겠어요~' elif type == 2: weather = weatherToday(crawl('오늘' + '%20' + parameter['Location'] + '%20날씨')) if weather is None: return 'a\t알 수 없는 지역입니다.' m = '오늘 ' + weather['날씨']['지역'] + ' 날씨는 ' + weather['날씨']['날씨'] + '. 현재온도는 ' + weather['날씨']['온도'].split('씨℃')[0] + ', 최저기온은 ' + weather['날씨']['최저기온'] + ', 최고기온은 ' + weather['날씨']['최고기온'] + ' 입니다. 체감온도는 ' + weather['날씨']['체감온도'] + ' 입니다.' if self.users[key]['type'] == 10: return 'a\t' + m return 'weathertoday\t' + m + '\t' + str(weather) elif program == 7: if function == 1: if type == 1: wiki = wikiCrawler.WikiCrawler() data = wiki.get(parameter['WikiName']) if data is None: return 'a\t제가 알고 있는 단어가 아니에요.' return 'a\t' + wiki.get(parameter['WikiName']) elif program == 8: if function == 1: if type == 1: ch = False tell = self.analMsg(parameter['Tell']) parameter['number'] = int(float(parameter['number'])) parameter['number1'] = int(float(parameter['number1'])) for k in self.users: print(str(self.users[k]['dong']) + ' ' + str(parameter['number']) + ' ' + str(self.users[k]['ho']) + ' ' + str(parameter['number1'])) if self.users[k]['type'] == 0 and self.users[k]['dong'] == parameter['number'] and self.users[k]['ho'] == parameter['number1'] : self.sendMessage(self.users[k]['client'], 'msg\ta\t{dong}동 {ho}호로 부터 메시지가 도착했습니다. {tell}'.format(dong=self.users[key]['dong'], ho=self.users[key]['ho'], tell=tell)) ch = True if not ch: return 'a\t해당 집 스피커가 접속 중이 아닙니다.' return 'a\t{dong}동 {ho}호에게 {tell} 메시지를 보냈습니다.'.format(dong=parameter['number'], ho=parameter['number1'], tell=tell) return None