def search(pages, keywords): result = [] ksize = len(keywords) urls = set() for page in pages: grade = 0 for index, keyword in enumerate(keywords): # the top keyword is more important than the bottom one. indexBonus = (ksize - index) * 0.1 if stringutil.contains(page.get('keyword', ''), keyword): grade += len(keyword) + indexBonus elif stringutil.contains(page.get('title', ''), keyword): grade += len(keyword) + indexBonus if grade > 0: if page.get('url') in urls: continue urls.add(page.get('url')) page['grade'] = grade result.append(page) result.sort(key=lambda page: page.get('added'), reverse=True) result.sort(key=lambda page: page['grade'], reverse=True) for page in result: del page['grade'] return result
def get(self, eventScope, eventId): event = models.getEvent(eventScope, eventId) if not event: self.error(404) return event["pages"].sort(key=lambda page: page.get("published") or page["added"], reverse=True) if "keyword" in self.extraValues: import jieba # May fail to load jieba jieba.initialize(usingSmall=True) words = list(jieba.cut(self.extraValues["keyword"], cut_all=False)) for page in event["pages"]: page["grade"] = 0 for word in words: if len(word) <= 1: continue if stringutil.contains(page.get("title", ""), word): page["grade"] += len(word) event["pages"].sort(key=lambda page: page["grade"], reverse=True) templateValues = {"event": event} self.render(templateValues, "event.html")