def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """

        topic_text = search_context.topic.get_topic_text()
        if self.topic_lang_model is None:
            self.topic_lang_model = self._generate_topic_language_model(
                search_context)

        snip_text = self._get_snip_text(search_context)

        all_text = topic_text + ' ' + snip_text

        all_text = self._check_terms(all_text)

        bi_query_generator = BiTermQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)
        tri_query_generator = TriTermQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)

        tri_query_list = tri_query_generator.extract_queries_from_text(
            all_text)
        bi_query_list = bi_query_generator.extract_queries_from_text(all_text)

        query_list = tri_query_list + bi_query_list

        query_ranker = QueryRanker(
            smoothed_language_model=self.topic_lang_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        gen_query_list = query_ranker.get_top_queries(100)

        return gen_query_list
Example #2
0
    def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """

        topic_text = search_context.topic.get_topic_text()
        if self.topic_lang_model is None:
            self.topic_lang_model = self._generate_topic_language_model(search_context)


        snip_text = self._get_snip_text(search_context)

        all_text = topic_text + ' ' + snip_text

        all_text = self._check_terms(all_text)

        bi_query_generator = BiTermQueryGeneration(minlen=3, stopwordfile=self._stopword_file)
        tri_query_generator = TriTermQueryGeneration(minlen=3, stopwordfile=self._stopword_file)

        tri_query_list = tri_query_generator.extract_queries_from_text(all_text)
        bi_query_list = bi_query_generator.extract_queries_from_text(all_text)

        query_list = tri_query_list + bi_query_list


        query_ranker = QueryRanker(smoothed_language_model=self.topic_lang_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        gen_query_list = query_ranker.get_top_queries(100)


        return gen_query_list
Example #3
0
 def _rank_terms(self, terms, **kwargs):
     """
     Ranks the query terms by their discriminatory power.
     The length of the list returned == list of initial terms supplied.
     """
     topic_language_model = kwargs.get('topic_language_model', None)
     
     ranker = QueryRanker(smoothed_language_model=topic_language_model)
     ranker.calculate_query_list_probabilities(terms)
     return ranker.get_top_queries(len(terms))
Example #4
0
    def _rank_terms(self, terms, **kwargs):
        """
        Ranks the query terms by their discriminatory power.
        The length of the list returned == list of initial terms supplied.
        """
        topic_language_model = kwargs.get('topic_language_model', None)

        ranker = QueryRanker(smoothed_language_model=topic_language_model)
        ranker.calculate_query_list_probabilities(terms)
        return ranker.get_top_queries(len(terms))
    def generate_query_list(self, topic):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """
        topic_text = topic.content
        topic_lang_model = self._generate_topic_language_model(topic)

        single_query_generator = SingleQueryGeneration(minlen=3, stopwordfile=self._stopword_file)

        single_query_list = single_query_generator.extract_queries_from_text(topic_text)

        query_ranker = QueryRanker(smoothed_language_model=topic_lang_model)
        query_ranker.calculate_query_list_probabilities(single_query_list)
        return query_ranker.get_top_queries(100)
Example #6
0
    def produce_query_list(self, topic):

        topic_text = topic.content
        topicLM = self.make_topic_lm(topic)
        bi_query_generator = BiTermQueryGeneration(minlen=3, stopwordfile=self.stopword_file)
        tri_query_generator = TriTermQueryGeneration(minlen=3, stopwordfile=self.stopword_file)
        tri_query_list = tri_query_generator.extract_queries_from_text(topic_text)
        bi_query_list = bi_query_generator.extract_queries_from_text(topic_text)

        query_list = tri_query_list + bi_query_list

        qr = QueryRanker(smoothed_language_model=topicLM)
        qr.calculate_query_list_probabilities(query_list)
        queries = qr.get_top_queries(100)
        return queries
Example #7
0
    def generate_query_list(self, topic):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """
        topic_text = topic.content
        topic_lang_model = self._generate_topic_language_model(topic)

        single_query_generator = SingleQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)

        single_query_list = single_query_generator.extract_queries_from_text(
            topic_text)

        query_ranker = QueryRanker(smoothed_language_model=topic_lang_model)
        query_ranker.calculate_query_list_probabilities(single_query_list)
        return query_ranker.get_top_queries(100)
Example #8
0
    def produce_query_list(self, topic):

        topic_text = topic.content
        topicLM = self.make_topic_lm(topic)
        bi_query_generator = BiTermQueryGeneration(
            minlen=3, stopwordfile=self.stopword_file)
        tri_query_generator = TriTermQueryGeneration(
            minlen=3, stopwordfile=self.stopword_file)
        tri_query_list = tri_query_generator.extract_queries_from_text(
            topic_text)
        bi_query_list = bi_query_generator.extract_queries_from_text(
            topic_text)

        query_list = tri_query_list + bi_query_list

        qr = QueryRanker(smoothed_language_model=topicLM)
        qr.calculate_query_list_probabilities(query_list)
        queries = qr.get_top_queries(100)
        return queries
Example #9
0
    def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """
        topic = search_context.topic

        topic_text = "{0} {1}".format(topic.title, topic.content)

        topic_language_model = self._generate_topic_language_model(search_context)
        
        generator = SingleQueryGeneration(minlen=3, stopwordfile=self._stopword_file)
        query_list = generator.extract_queries_from_text(topic_text)
        
        query_ranker = QueryRanker(smoothed_language_model=topic_language_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        
        generated_queries = query_ranker.get_top_queries(100)

        return generated_queries
Example #10
0
    def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """

        topic_text = search_context.topic.get_topic_text()
        if self.topic_lang_model is None:
            self.topic_lang_model = self._generate_topic_language_model(search_context)


        snip_text = self._get_snip_text(search_context)

        all_text = topic_text + ' ' + snip_text

        all_text = self._check_terms(all_text)


        term_list = all_text.split(' ')
        term_list = list(set(term_list))


        q3_list = list(itertools.combinations(term_list,3))
        q4_list = list(itertools.combinations(term_list,4))

        query_list = []

        for q in q3_list:
            query_list.append( ' '.join(q))

        for q in q4_list:
            query_list.append( ' '.join(q))




        query_ranker = QueryRanker(smoothed_language_model=self.topic_lang_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        gen_query_list = query_ranker.get_top_queries(100)


        return gen_query_list
Example #11
0
    def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """
        topic = search_context.topic
        topic_text = "{0} {1}".format(topic.title, topic.content)

        topic_lang_model = self._generate_topic_language_model(search_context)

        bi_query_generator = BiTermQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)

        bi_query_list = bi_query_generator.extract_queries_from_text(
            topic_text)

        query_list = bi_query_list

        query_ranker = QueryRanker(smoothed_language_model=topic_lang_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        gen_query_list = query_ranker.get_top_queries(100)

        return gen_query_list
    def generate_query_list(self, search_context):
        """
        Given a Topic object, produces a list of query terms that could be issued by the simulated agent.
        """

        topic_text = search_context.topic.get_topic_text()
        if self.topic_lang_model is None:
            self.topic_lang_model = self._generate_topic_language_model(
                search_context)

        snip_text = self._get_snip_text(search_context)

        all_text = topic_text + ' ' + snip_text

        all_text = self._check_terms(all_text)

        term_list = all_text.split(' ')
        term_list = list(set(term_list))

        q3_list = list(itertools.combinations(term_list, 3))
        q4_list = list(itertools.combinations(term_list, 4))

        query_list = []

        for q in q3_list:
            query_list.append(' '.join(q))

        for q in q4_list:
            query_list.append(' '.join(q))

        query_ranker = QueryRanker(
            smoothed_language_model=self.topic_lang_model)
        query_ranker.calculate_query_list_probabilities(query_list)
        gen_query_list = query_ranker.get_top_queries(100)

        return gen_query_list