def similarity(self, term1, term2): # stem and remove stop words in two terms first to make them compatible with those stored parser = Parser() term1 = parser.tokenise(term1) # after tokenization, it is a list if term1 == []: return 0 else: term1 = parser.tokenise(term1[0]) term2 = parser.tokenise(term2) if term2 == []: return 0 else: term2 = parser.tokenise(term2[0]) try: index1 = self.keyword_index_mapping[term1[0]] except: print term1, ": indexing error 1" return 0 try: index2 = self.keyword_index_mapping[term2[0]] except: print term2, "indexing error 2" return 0 return float( np.dot(self.lsa_matrix[index1], self.lsa_matrix[index2]) / (np.linalg.norm(self.lsa_matrix[index1]) * np.linalg.norm(self.lsa_matrix[index2])) )