def get(self, keyword): pages = [] spages = [] words = [] if keyword: import jieba # May fail to load jieba jieba.initialize(usingSmall=True) words = list(jieba.cut(keyword, cut_all=False)) words = [ word for word in words if len(word) > 1 ] # words = list(jieba.cut_for_search(keyword)) keyword = stringutil.parseUnicode(keyword) pages = snapi.getAllPages() pages = globalutil.search(pages, words) globalutil.populateSourceUrl(pages) twitterAccount = globalconfig.getTwitterAccount() spages = bs.search(words[0], twitterAccount) templateValues = { 'keyword': keyword, 'pages': pages, 'spages': spages, 'words': words, } self.render(templateValues, 'search.html')
def _saveWords(keyname, words, pages): matchedWords = [] for keywords in words: word = {} word['keywords'] = keywords matched = globalutil.search(pages, keywords) if matched: wordPage = matched[0] word['page'] = wordPage word['size'] = len(matched) word['readablekeywords'] = _getNaturalKeywords(keywords, matched) matchedWords.append(word) nnow = dateutil.getDateAs14(datetime.datetime.utcnow()) data = { 'updated': nnow, 'words': matchedWords, } models.saveWords(keyname, data) return matchedWords
def summarizeEvents(eventCriterion, scope, words, pages, twitterAccount): exposePages = eventCriterion['expose.pages'] events = models.getEvents(scope) if not events: events = { 'counter': 0, 'items': [], } _archiveEvents(scope, events) nnow = dateutil.getDateAs14(datetime.datetime.utcnow()) for word in reversed(words): event = _summarizeEvent(exposePages, scope, events, word, nnow) if event: matcheds = globalutil.search(pages, word['keywords']) _saveEventItem(scope, event['id'], word, nnow, matcheds, twitterAccount) events['items'].sort(key=lambda item: item['updated'], reverse=True) events['items'].sort(key=lambda item: item['word']['size'], reverse=True) events['updated'] = nnow models.saveEvents(scope, events)