Beispiel #1
0
 def get_description(self, query):
     additional_words = []
     synset = self._query_babelnet(query)
     if synset is not None:
         if len(synset) > 0:
             if len(synset['glosses']) > 0:
                 if 'gloss' in synset['glosses'][0]:
                     sentences = synset['glosses'][0]['gloss'].split('.')
                     print('Description Result:')
                     print(sentences)
                     if sentences is not None:
                         print('found information for query: ' + query)
                         for i in range(
                                 0,
                                 min(len(sentences),
                                     self._NUMBER_OF_SENTENCES)):
                             transformer = StringTransformer()
                             additional_sentence = transformer.transform(
                                 sentences[i]).get_words_list()
                             additional_words.extend(additional_sentence)
                     else:
                         print('information not found for query: ' + query)
                 else:
                     print('information not found for query: ' + query)
             else:
                 print('information not found for query: ' + query)
         else:
             print('information not found for query: ' + query)
     return additional_words
Beispiel #2
0
 def find(self, query):
     transformer = StringTransformer()
     analyzer = EnglishAnalyzer(Version.LUCENE_CURRENT)
     reader = IndexReader.open(SimpleFSDirectory(File("index/")))
     searcher = IndexSearcher(reader)
     searcher.setSimilarity(BM25Similarity())
     processed_query = ' '.join(
         self._preprocessor(transformer.transform(query)))
     query = QueryParser(Version.LUCENE_CURRENT, "content",
                         analyzer).parse(processed_query)
     hits = searcher.get_description(query, 10)
     result_list = []
     for hit in hits.scoreDocs:
         doc = searcher.doc(hit.doc)
         result_list.append(doc.get("path").encode("utf-8"))
     return result_list
Beispiel #3
0
 def get_type(self, query):
     additional_words = []
     topic = self._query_freebase(query)
     if topic is not None:
         if '/common/topic/notable_types' in topic['property']:
             sentences = topic['property']['/common/topic/notable_types']['values'][0]['text']
             if sentences is not None:
                 print('found parent type for query: ' + query)
                 transformer = StringTransformer()
                 additional_sentence = transformer.transform(sentences).get_words_list()
                 additional_words.extend(additional_sentence)
             else:
                 print('parent type not found for query: ' + query)
         else:
             print('parent type not found for query: ' + query)
     else:
         print('parent type not found for query: ' + query)
Beispiel #4
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_array = self._vectorizer.transform([self._query_transformer.transform(query).get_words_str()]).toarray()
     result = self._kdtree_index.query(query_array, k=len(self._service_array), return_distance=False)
     result_list = []
     for index in result[0]:
         result_list.append(self._service_array[index])
     return result_list
Beispiel #5
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_vector = self._dictionary.doc2bow(self._preprocessor(self._query_transformer.transform(query).get_words_list()))
     query_tfidf_vector = self._tfidf_model[query_vector]
     results = self._index[query_tfidf_vector]
     results = sorted(enumerate(results), key=lambda item: -item[1])
     result_list = []
     for tuple_result in results:
         result_list.append(self._service_array[tuple_result[0]])
     return result_list
Beispiel #6
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_array = self._vectorizer.transform(
         [self._query_transformer.transform(query).get_words_str()])
     query_array = self._svd.transform(query_array.toarray())
     result = self._lsi_index.kneighbors(query_array,
                                         return_distance=False)[0]
     result_list = []
     for index in result:
         result_list.append(self._service_array[index])
     return result_list
Beispiel #7
0
 def find(self, query):
     transformer = StringTransformer()
     query = self._preprocessor(
         transformer.transform(query).get_words_list())
     # Filter words from the query that aren't in the vocabulary
     query = list([x for x in query if x in self._fasttext_model.vocab])
     results = []
     for key in list(self._service_map.keys()):
         # Assign 0 similarty for empty documents, otherwise calculate similarity
         if self._service_map[key]:
             results.append((key,
                             self._fasttext_model.n_similarity(
                                 query, self._service_map[key])))
         else:
             results.append((key, 0))
     results = sorted(results, key=lambda item: -item[1])
     result_list = []
     for tuple_result in results:
         result_list.append(tuple_result[0])
     return result_list
Beispiel #8
0
 def get_description(self, query):
     additional_words = []
     topic = self._query_freebase(query)
     if topic is not None:
         if '/common/topic/article' in topic['property']:
             if '/common/document/text' in topic['property']['/common/topic/article']['values'][0]['property']:
                 sentences = topic['property']['/common/topic/article']['values'][0]['property']['/common/document/text']['values'][0]['value'].split('.')
                 if sentences is not None:
                     print('found information for query: ' + query)
                     for i in range(0, min(len(sentences), self._NUMBER_OF_SENTENCES)):
                         transformer = StringTransformer()
                         additional_sentence = transformer.transform(sentences[i]).get_words_list()
                         additional_words.extend(additional_sentence)
                 else:
                     print('information not found for query: ' + query)
             else:
                 print('information not found for query: ' + query)
         else:
             print('information not found for query: ' + query)
     return additional_words
Beispiel #9
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_vector = self._vectorizer.transform(
         [self._query_transformer.transform(query).get_words_str()])
     query_vae = self._model.transform(query_vector)
     results = cosine_similarity(query_vae, self._index)
     results = sorted(enumerate(results[0]), key=lambda item: -item[1])
     result_list = []
     for tuple_result in results:
         result_list.append(self._service_array[tuple_result[0]])
     return result_list
Beispiel #10
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_vector = self._doc_to_nbow(
         self._query_transformer.transform(query).get_words_list())
     query_vector = np.expand_dims(query_vector, axis=0)
     query_vae = self._model.transform(query_vector)
     results = cosine_similarity(query_vae, self._index)
     results = sorted(enumerate(results[0]), key=lambda item: -item[1])
     result_list = []
     for tuple_result in results:
         result_list.append(self._service_array[tuple_result[0]])
     return result_list
Beispiel #11
0
 def find(self, query):
     query = StringTransformer().transform(query)
     query_array = self._vectorizer.transform([self._query_transformer.transform(query).get_words_str()]).toarray()
     target_label = self._cluster_index.predict(query_array)[0]
     target_indexes = self._cluster[target_label].kneighbors(query_array, return_distance=False)[0]
     result = []
     for target in target_indexes:
         result.append(self._document_cluster[target_label][target][1])
     result_list = []
     for index in result:
         result_list.append(self._service_array[index])
     return result_list