def update_model(self, search_context):
        if not self.updating:
            return False

        snippet_text = self._get_snip_text(search_context)
        snippet_text = self._check_terms(snippet_text)

        if snippet_text:
            topic_text = search_context.topic.get_topic_text()
            all_text = '{0} {1}'.format(topic_text, snippet_text)

            #snippet_term_counts = lm_methods.extract_term_dict_from_text(snippet_text, self._stopword_file)
            #topic_term_counts = lm_methods.extract_term_dict_from_text(topic_text, self._stopword_file)
            #title_language_model = LanguageModel(term_dict=topic_term_counts)
            #snippet_language_model = LanguageModel(term_dict=snippet_term_counts)
            #topic_language_model = BayesLanguageModel(title_language_model, snippet_language_model, beta=10)

            term_counts = lm_methods.extract_term_dict_from_text(
                all_text, self._stopword_file)
            language_model = LanguageModel(term_dict=term_counts)

            self.topic_lang_model = language_model
            if self.background_language_model:
                smoothed_topic_language_model = SmoothedLanguageModel(
                    language_model, self.background_language_model)
                self.topic_lang_model = smoothed_topic_language_model

            return True
        else:
            return False
Example #2
0
    def __update_topic_language_model(self, text_list):

        topic_text = '{title} {title} {title} {content}'.format(
            **self._topic.__dict__)

        n = len(text_list)
        snippet_text = ' '.join(text_list)

        term_extractor = SingleQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)
        term_extractor.extract_queries_from_text(topic_text)
        topic_term_counts = term_extractor.query_count

        term_extractor.extract_queries_from_text(snippet_text)
        new_text_term_counts = term_extractor.query_count

        for term in topic_term_counts:
            if term in new_text_term_counts:
                new_text_term_counts[term] += topic_term_counts[term]
            else:
                new_text_term_counts[term] = topic_term_counts[term]

        new_language_model = LanguageModel(term_dict=new_text_term_counts)

        self.topic_language_model = SmoothedLanguageModel(
            new_language_model, self.background_language_model, self.mu)

        log.debug("Updating topic {0}".format(self._topic.id))
Example #3
0
    def make_topic_lm(self):
        topic_text = self.topic.content + self.topic.title

        doc_extractor = SingleQueryGeneration(minlen=3,
                                              stopwordfile=self.stopword_file)
        doc_extractor.extract_queries_from_text(topic_text)
        doc_term_counts = doc_extractor.query_count
        lm = LanguageModel(term_dict=doc_term_counts)
        self.topicLM = SmoothedLanguageModel(lm, self.backgroundLM, 100)
        print "making topic", self.topicLM.docLM.total_occurrences
    def _generate_topic_language_model(self, search_context):
        """
        creates an empirical language model based on the search topic, or a smoothed language model if a background model has been loaded.
        """
        topic_text = self._make_topic_text(search_context)
        topic_term_counts = lm_methods.extract_term_dict_from_text(
            topic_text, self._stopword_file)

        topic_language_model = LanguageModel(term_dict=topic_term_counts)
        if self.background_language_model:
            smoothed_topic_language_model = SmoothedLanguageModel(
                topic_language_model, self.background_language_model)
            return smoothed_topic_language_model
        else:
            return topic_language_model
Example #5
0
    def make_topic_language_model(self):
        """
        
        """
        topic_text = self._topic.content + self._topic.title

        document_extractor = SingleQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)
        document_extractor.extract_queries_from_text(topic_text)
        document_term_counts = document_extractor.query_count

        language_model = LanguageModel(term_dict=document_term_counts)
        self.topic_language_model = SmoothedLanguageModel(
            language_model, self.background_language_model, 100)
        print "making topic", self.topic_language_model.docLM.total_occurrences
Example #6
0
    def make_topic_language_model(self):
        """
        
        """
        topic_text = '{title} {title} {title} {content}'.format(
            **self._topic.__dict__)

        document_extractor = SingleQueryGeneration(
            minlen=3, stopwordfile=self._stopword_file)
        document_extractor.extract_queries_from_text(topic_text)
        document_term_counts = document_extractor.query_count

        language_model = LanguageModel(term_dict=document_term_counts)

        self.topic_language_model = SmoothedLanguageModel(
            language_model, self.background_language_model, self.mu)
        log.debug("Making topic {0}".format(
            self.topic_language_model.docLM.total_occurrences))