Esempio n. 1
0
 def similarity(self, term1, term2):
     # stem and remove stop words in two terms first to make them compatible with those stored
     parser = Parser()
     term1 = parser.tokenise(term1)  # after tokenization, it is a list
     if term1 == []:
         return 0
     else:
         term1 = parser.tokenise(term1[0])
     term2 = parser.tokenise(term2)
     if term2 == []:
         return 0
     else:
         term2 = parser.tokenise(term2[0])
     try:
         index1 = self.keyword_index_mapping[term1[0]]
     except:
         print term1, ": indexing error 1"
         return 0
     try:
         index2 = self.keyword_index_mapping[term2[0]]
     except:
         print term2, "indexing error 2"
         return 0
     return float(
         np.dot(self.lsa_matrix[index1], self.lsa_matrix[index2])
         / (np.linalg.norm(self.lsa_matrix[index1]) * np.linalg.norm(self.lsa_matrix[index2]))
     )