def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = search_context.topic.get_topic_text() if self.topic_lang_model is None: self.topic_lang_model = self._generate_topic_language_model( search_context) snip_text = self._get_snip_text(search_context) all_text = topic_text + ' ' + snip_text all_text = self._check_terms(all_text) bi_query_generator = BiTermQueryGeneration( minlen=3, stopwordfile=self._stopword_file) tri_query_generator = TriTermQueryGeneration( minlen=3, stopwordfile=self._stopword_file) tri_query_list = tri_query_generator.extract_queries_from_text( all_text) bi_query_list = bi_query_generator.extract_queries_from_text(all_text) query_list = tri_query_list + bi_query_list query_ranker = QueryRanker( smoothed_language_model=self.topic_lang_model) query_ranker.calculate_query_list_probabilities(query_list) gen_query_list = query_ranker.get_top_queries(100) return gen_query_list
def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = search_context.topic.get_topic_text() if self.topic_lang_model is None: self.topic_lang_model = self._generate_topic_language_model(search_context) snip_text = self._get_snip_text(search_context) all_text = topic_text + ' ' + snip_text all_text = self._check_terms(all_text) bi_query_generator = BiTermQueryGeneration(minlen=3, stopwordfile=self._stopword_file) tri_query_generator = TriTermQueryGeneration(minlen=3, stopwordfile=self._stopword_file) tri_query_list = tri_query_generator.extract_queries_from_text(all_text) bi_query_list = bi_query_generator.extract_queries_from_text(all_text) query_list = tri_query_list + bi_query_list query_ranker = QueryRanker(smoothed_language_model=self.topic_lang_model) query_ranker.calculate_query_list_probabilities(query_list) gen_query_list = query_ranker.get_top_queries(100) return gen_query_list
def _rank_terms(self, terms, **kwargs): """ Ranks the query terms by their discriminatory power. The length of the list returned == list of initial terms supplied. """ topic_language_model = kwargs.get('topic_language_model', None) ranker = QueryRanker(smoothed_language_model=topic_language_model) ranker.calculate_query_list_probabilities(terms) return ranker.get_top_queries(len(terms))
def generate_query_list(self, topic): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = topic.content topic_lang_model = self._generate_topic_language_model(topic) single_query_generator = SingleQueryGeneration(minlen=3, stopwordfile=self._stopword_file) single_query_list = single_query_generator.extract_queries_from_text(topic_text) query_ranker = QueryRanker(smoothed_language_model=topic_lang_model) query_ranker.calculate_query_list_probabilities(single_query_list) return query_ranker.get_top_queries(100)
def produce_query_list(self, topic): topic_text = topic.content topicLM = self.make_topic_lm(topic) bi_query_generator = BiTermQueryGeneration(minlen=3, stopwordfile=self.stopword_file) tri_query_generator = TriTermQueryGeneration(minlen=3, stopwordfile=self.stopword_file) tri_query_list = tri_query_generator.extract_queries_from_text(topic_text) bi_query_list = bi_query_generator.extract_queries_from_text(topic_text) query_list = tri_query_list + bi_query_list qr = QueryRanker(smoothed_language_model=topicLM) qr.calculate_query_list_probabilities(query_list) queries = qr.get_top_queries(100) return queries
def generate_query_list(self, topic): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = topic.content topic_lang_model = self._generate_topic_language_model(topic) single_query_generator = SingleQueryGeneration( minlen=3, stopwordfile=self._stopword_file) single_query_list = single_query_generator.extract_queries_from_text( topic_text) query_ranker = QueryRanker(smoothed_language_model=topic_lang_model) query_ranker.calculate_query_list_probabilities(single_query_list) return query_ranker.get_top_queries(100)
def produce_query_list(self, topic): topic_text = topic.content topicLM = self.make_topic_lm(topic) bi_query_generator = BiTermQueryGeneration( minlen=3, stopwordfile=self.stopword_file) tri_query_generator = TriTermQueryGeneration( minlen=3, stopwordfile=self.stopword_file) tri_query_list = tri_query_generator.extract_queries_from_text( topic_text) bi_query_list = bi_query_generator.extract_queries_from_text( topic_text) query_list = tri_query_list + bi_query_list qr = QueryRanker(smoothed_language_model=topicLM) qr.calculate_query_list_probabilities(query_list) queries = qr.get_top_queries(100) return queries
def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic = search_context.topic topic_text = "{0} {1}".format(topic.title, topic.content) topic_language_model = self._generate_topic_language_model(search_context) generator = SingleQueryGeneration(minlen=3, stopwordfile=self._stopword_file) query_list = generator.extract_queries_from_text(topic_text) query_ranker = QueryRanker(smoothed_language_model=topic_language_model) query_ranker.calculate_query_list_probabilities(query_list) generated_queries = query_ranker.get_top_queries(100) return generated_queries
def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = search_context.topic.get_topic_text() if self.topic_lang_model is None: self.topic_lang_model = self._generate_topic_language_model(search_context) snip_text = self._get_snip_text(search_context) all_text = topic_text + ' ' + snip_text all_text = self._check_terms(all_text) term_list = all_text.split(' ') term_list = list(set(term_list)) q3_list = list(itertools.combinations(term_list,3)) q4_list = list(itertools.combinations(term_list,4)) query_list = [] for q in q3_list: query_list.append( ' '.join(q)) for q in q4_list: query_list.append( ' '.join(q)) query_ranker = QueryRanker(smoothed_language_model=self.topic_lang_model) query_ranker.calculate_query_list_probabilities(query_list) gen_query_list = query_ranker.get_top_queries(100) return gen_query_list
def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic = search_context.topic topic_text = "{0} {1}".format(topic.title, topic.content) topic_lang_model = self._generate_topic_language_model(search_context) bi_query_generator = BiTermQueryGeneration( minlen=3, stopwordfile=self._stopword_file) bi_query_list = bi_query_generator.extract_queries_from_text( topic_text) query_list = bi_query_list query_ranker = QueryRanker(smoothed_language_model=topic_lang_model) query_ranker.calculate_query_list_probabilities(query_list) gen_query_list = query_ranker.get_top_queries(100) return gen_query_list
def generate_query_list(self, search_context): """ Given a Topic object, produces a list of query terms that could be issued by the simulated agent. """ topic_text = search_context.topic.get_topic_text() if self.topic_lang_model is None: self.topic_lang_model = self._generate_topic_language_model( search_context) snip_text = self._get_snip_text(search_context) all_text = topic_text + ' ' + snip_text all_text = self._check_terms(all_text) term_list = all_text.split(' ') term_list = list(set(term_list)) q3_list = list(itertools.combinations(term_list, 3)) q4_list = list(itertools.combinations(term_list, 4)) query_list = [] for q in q3_list: query_list.append(' '.join(q)) for q in q4_list: query_list.append(' '.join(q)) query_ranker = QueryRanker( smoothed_language_model=self.topic_lang_model) query_ranker.calculate_query_list_probabilities(query_list) gen_query_list = query_ranker.get_top_queries(100) return gen_query_list