class Search_System: "a search system" inverted_index = None def __init__(self): #constructor self.inverted_index = Inverted_Index() def rank_answer(list, score_list): mix_list = zip(list,score_list) sorted_list = sorted(mix_list,key=lambda value:value[1],reverse = True) answer_list = [i for i,j in sorted_list] return answer_list def search3(self,query): "search for many query with boolean expression" root = self.inverted_index.root query_list = query.split() expression = shunting_yard.infixToRPN(query_list) expression.reverse() stack = [] plist_num=0 plist_hashlist={} #[2012/08/22 added] to initial variables to find score slist_num=0 slist_hashlist={} # just find that term if doing only single word query. if len(expression) ==0: return [] elif len(expression) <= 1: word = expression.pop() answer_last= self.inverted_index.lookup(root,word) # get node to find tf-idf node_answer_last = self.inverted_index.lookupNode(root,word) if(node_answer_last==None): return [] tf_idf_list = node_answer_last.get_tf_idf_list(self.inverted_index.docCount,self.inverted_index.highest_tc) ranked_answer = Search_System.rank_answer(answer_last,tf_idf_list) return ranked_answer # in case of > 1 word query while len(expression)!=0: token = expression.pop() if shunting_yard.isOperator(token): word1 = stack.pop() if( not word1 in plist_hashlist ): answer_list1 = self.inverted_index.lookup(root,word1) # [2012/08/22 added] to get score of word node1 = self.inverted_index.lookupNode(root,word1) if(node1!=None): score_list1 = node1.get_tf_idf_list(self.inverted_index.docCount,self.inverted_index.highest_tc) else: score_list1 = [] else: answer_list1 = plist_hashlist[word1] # [2012/08/22 added] to get score of word score_list1 = slist_hashlist[word1] word2 = stack.pop() if( not word2 in plist_hashlist ): answer_list2 = self.inverted_index.lookup(root,word2) # [2012/08/22 added] to get score of word node2 = self.inverted_index.lookupNode(root,word2) if(node2!=None): score_list2 = node2.get_tf_idf_list(self.inverted_index.docCount,self.inverted_index.highest_tc) else: score_list2 = [] else: answer_list2 = plist_hashlist[word2] # [2012/08/22 added] to get score of word score_list2 = slist_hashlist[word2] if(token=="&&"): answer_last = self.intersect(answer_list1,answer_list2) # [2012/08/22 added] to get score of word score_list_last = self.intersect_score(answer_list1,answer_list2,score_list1,score_list2) elif(token=="||"): answer_last = self.union(answer_list1,answer_list2) # [2012/08/22 added] to get score of word score_list_last = self.union_score(answer_list1,answer_list2,score_list1,score_list2) else: print("Wrong Operator: program will exit") exit() stack.append("plist"+str(plist_num)) plist_hashlist["plist"+str(plist_num)] = answer_last plist_num+=1 #[2012/08/22 added] to store new score list in hash table slist_hashlist["plist"+str(slist_num)] = score_list_last slist_num+=1 else: stack.append(token) # get all the last answers #[2012/08/22 added] to store new score list in hash table last_word = stack.pop() answer = plist_hashlist[last_word]; #[2012/08/22 added] to get answer score list in hash table s_answer = slist_hashlist[last_word] print(answer) print(s_answer) ranked_answer = Search_System.rank_answer(answer,s_answer) return ranked_answer def union(self, sorted_plist1, sorted_plist2): "union of two postings lists plist1 and plist2, in other word, it is operand OR" answer = [] p1,p2=0,0 while(p1<len(sorted_plist1) and p2<len(sorted_plist2)): if(sorted_plist1[p1]==sorted_plist2[p2]): answer.append(sorted_plist1[p1]) p1+=1;p2+=1 elif(sorted_plist1[p1][0]<sorted_plist2[p2][0]): answer.append(sorted_plist1[p1]) p1+=1 else: answer.append(sorted_plist2[p2]) p2+=1 while(p1<len(sorted_plist1)): answer.append(sorted_plist1[p1]) p1+=1 while(p2<len(sorted_plist2)): answer.append(sorted_plist2[p2]) p2+=1 return answer # [2012/08/22 added] find union of 2 score lists def union_score(self, sorted_plist1, sorted_plist2,slist1,slist2): "union of two postings lists plist1 and plist2, in other word, it is operand OR" answer = [] s_answer=[] p1,p2=0,0 while(p1<len(sorted_plist1) and p2<len(sorted_plist2)): if(sorted_plist1[p1]==sorted_plist2[p2]): answer.append(sorted_plist1[p1]) s_answer.append(slist1[p1]+slist2[p2]) p1+=1;p2+=1 elif(sorted_plist1[p1][0]<sorted_plist2[p2][0]): answer.append(sorted_plist1[p1]) s_answer.append(slist1[p1]) p1+=1 else: answer.append(sorted_plist2[p2]) s_answer.append(slist2[p2]) p2+=1 while(p1<len(sorted_plist1)): answer.append(sorted_plist1[p1]) s_answer.append(slist1[p1]) p1+=1 while(p2<len(sorted_plist2)): answer.append(sorted_plist2[p2]) s_answer.append(slist2[p2]) p2+=1 return s_answer def intersect(self,sorted_plist1,sorted_plist2): "intersection of two postings lists plist1 and plist2, in other word, it is operand AND" answer = [] p1,p2=0,0 while(p1<len(sorted_plist1) and p2<len(sorted_plist2)): if(sorted_plist1[p1]==sorted_plist2[p2]): answer.append(sorted_plist1[p1]) p1+=1; p2+=1 elif(sorted_plist1[p1][0]<sorted_plist2[p2][0]): p1+=1 else: p2+=1 return answer # [2012/08/22 added] find intersection of 2 score lists def intersect_score(self,sorted_plist1,sorted_plist2,slist1,slist2): "intersection of two postings lists plist1 and plist2, in other word, it is operand AND" answer = [] s_answer= [] p1,p2=0,0 while(p1<len(sorted_plist1) and p2<len(sorted_plist2)): if(sorted_plist1[p1]==sorted_plist2[p2]): answer.append(sorted_plist1[p1]) s_answer.append(slist1[p1]+slist2[p2]) p1+=1; p2+=1 elif(sorted_plist1[p1][0]<sorted_plist2[p2][0]): p1+=1 else: p2+=1 return s_answer def load_index(self,path): "load inverted index to search system" self.inverted_index.clean(self.inverted_index.root) if(not os.path.isfile(path)): print("no such file - "+path) return False with open(path,'rb') as index_file: self.inverted_index = pickle.load(index_file) #self.inverted_index.printTree(self.inverted_index.root) return True