def get_description(self, query): additional_words = [] synset = self._query_babelnet(query) if synset is not None: if len(synset) > 0: if len(synset['glosses']) > 0: if 'gloss' in synset['glosses'][0]: sentences = synset['glosses'][0]['gloss'].split('.') print('Description Result:') print(sentences) if sentences is not None: print('found information for query: ' + query) for i in range( 0, min(len(sentences), self._NUMBER_OF_SENTENCES)): transformer = StringTransformer() additional_sentence = transformer.transform( sentences[i]).get_words_list() additional_words.extend(additional_sentence) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) return additional_words
def find(self, query): transformer = StringTransformer() analyzer = EnglishAnalyzer(Version.LUCENE_CURRENT) reader = IndexReader.open(SimpleFSDirectory(File("index/"))) searcher = IndexSearcher(reader) searcher.setSimilarity(BM25Similarity()) processed_query = ' '.join( self._preprocessor(transformer.transform(query))) query = QueryParser(Version.LUCENE_CURRENT, "content", analyzer).parse(processed_query) hits = searcher.get_description(query, 10) result_list = [] for hit in hits.scoreDocs: doc = searcher.doc(hit.doc) result_list.append(doc.get("path").encode("utf-8")) return result_list
def get_type(self, query): additional_words = [] topic = self._query_freebase(query) if topic is not None: if '/common/topic/notable_types' in topic['property']: sentences = topic['property']['/common/topic/notable_types']['values'][0]['text'] if sentences is not None: print('found parent type for query: ' + query) transformer = StringTransformer() additional_sentence = transformer.transform(sentences).get_words_list() additional_words.extend(additional_sentence) else: print('parent type not found for query: ' + query) else: print('parent type not found for query: ' + query) else: print('parent type not found for query: ' + query)
def find(self, query): query = StringTransformer().transform(query) query_array = self._vectorizer.transform([self._query_transformer.transform(query).get_words_str()]).toarray() result = self._kdtree_index.query(query_array, k=len(self._service_array), return_distance=False) result_list = [] for index in result[0]: result_list.append(self._service_array[index]) return result_list
def find(self, query): query = StringTransformer().transform(query) query_vector = self._dictionary.doc2bow(self._preprocessor(self._query_transformer.transform(query).get_words_list())) query_tfidf_vector = self._tfidf_model[query_vector] results = self._index[query_tfidf_vector] results = sorted(enumerate(results), key=lambda item: -item[1]) result_list = [] for tuple_result in results: result_list.append(self._service_array[tuple_result[0]]) return result_list
def find(self, query): query = StringTransformer().transform(query) query_array = self._vectorizer.transform( [self._query_transformer.transform(query).get_words_str()]) query_array = self._svd.transform(query_array.toarray()) result = self._lsi_index.kneighbors(query_array, return_distance=False)[0] result_list = [] for index in result: result_list.append(self._service_array[index]) return result_list
def find(self, query): transformer = StringTransformer() query = self._preprocessor( transformer.transform(query).get_words_list()) # Filter words from the query that aren't in the vocabulary query = list([x for x in query if x in self._fasttext_model.vocab]) results = [] for key in list(self._service_map.keys()): # Assign 0 similarty for empty documents, otherwise calculate similarity if self._service_map[key]: results.append((key, self._fasttext_model.n_similarity( query, self._service_map[key]))) else: results.append((key, 0)) results = sorted(results, key=lambda item: -item[1]) result_list = [] for tuple_result in results: result_list.append(tuple_result[0]) return result_list
def get_description(self, query): additional_words = [] topic = self._query_freebase(query) if topic is not None: if '/common/topic/article' in topic['property']: if '/common/document/text' in topic['property']['/common/topic/article']['values'][0]['property']: sentences = topic['property']['/common/topic/article']['values'][0]['property']['/common/document/text']['values'][0]['value'].split('.') if sentences is not None: print('found information for query: ' + query) for i in range(0, min(len(sentences), self._NUMBER_OF_SENTENCES)): transformer = StringTransformer() additional_sentence = transformer.transform(sentences[i]).get_words_list() additional_words.extend(additional_sentence) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) return additional_words
def find(self, query): query = StringTransformer().transform(query) query_vector = self._vectorizer.transform( [self._query_transformer.transform(query).get_words_str()]) query_vae = self._model.transform(query_vector) results = cosine_similarity(query_vae, self._index) results = sorted(enumerate(results[0]), key=lambda item: -item[1]) result_list = [] for tuple_result in results: result_list.append(self._service_array[tuple_result[0]]) return result_list
def find(self, query): query = StringTransformer().transform(query) query_vector = self._doc_to_nbow( self._query_transformer.transform(query).get_words_list()) query_vector = np.expand_dims(query_vector, axis=0) query_vae = self._model.transform(query_vector) results = cosine_similarity(query_vae, self._index) results = sorted(enumerate(results[0]), key=lambda item: -item[1]) result_list = [] for tuple_result in results: result_list.append(self._service_array[tuple_result[0]]) return result_list
def find(self, query): query = StringTransformer().transform(query) query_array = self._vectorizer.transform([self._query_transformer.transform(query).get_words_str()]).toarray() target_label = self._cluster_index.predict(query_array)[0] target_indexes = self._cluster[target_label].kneighbors(query_array, return_distance=False)[0] result = [] for target in target_indexes: result.append(self._document_cluster[target_label][target][1]) result_list = [] for index in result: result_list.append(self._service_array[index]) return result_list