def __init__(self,query): self.query = query self.gist_results = GistSearch(query).get_results() self.stack_results = StackSearch(query).get_results() self.hn_results = HackerNewsSearch(query).get_results() self.urls = self.stack_results + self.gist_results + self.hn_results self.ranked_urls = Queue.PriorityQueue() self.snippet_generator = SnippetGenarator(query)
class SearchControl(): def __init__(self,query): self.query = query self.gist_results = GistSearch(query).get_results() self.stack_results = StackSearch(query).get_results() self.hn_results = HackerNewsSearch(query).get_results() self.urls = self.stack_results + self.gist_results + self.hn_results self.ranked_urls = Queue.PriorityQueue() self.snippet_generator = SnippetGenarator(query) def calcualte_rank(self): for url in self.urls: score,code = self.calcualte_BM25_score(url) if score == None: continue self.ranked_urls.put((-1*score,url)) def get_top_results(self): self.calcualte_rank() top_results = [] for i in range(0,10): score, url = self.ranked_urls.get() snippet = self.snippet_generator.get_snippet(url) top_results.append((url,score,snippet)) return top_results def calcualte_BM25_score(self,url): req = urllib2.Request(url) req.add_header('User-Agent', 'Mozilla/5.0' ) try: page = urllib2.urlopen(req) code = page.getcode() data = page.readlines() bm25 = BM25(data,delimiter=' ') query = self.query.split() score = bm25.BM25Score(query) return (score, code) except urllib2.HTTPError, err: return (None,err.code) except urllib2.URLError, err: return (None, None)