def _update_topic_language_model(self, text_list): """ Updates the language model for the topic, given snippet/document text (text_list) and prior (knowledge) text. """ topic_text = self._make_topic_text() document_text = ' '.join(text_list) topic_term_counts = extract_term_dict_from_text( topic_text, self._stopword_file) background_scores = self._topic.background_terms document_term_counts = extract_term_dict_from_text( document_text, self._stopword_file) combined_term_counts = {} combined_term_counts = self._combine_dictionaries( combined_term_counts, topic_term_counts, self.topic_weighting) combined_term_counts = self._combine_dictionaries( combined_term_counts, background_scores, self.topic_background_weighting) combined_term_counts = self._combine_dictionaries( combined_term_counts, document_term_counts, self.document_weighting) # Build the updated language model. new_language_model = LanguageModel(term_dict=combined_term_counts) self.topic_language_model = new_language_model log.debug("Updating topic {0}".format(self._topic.id))
def update_model(self, search_context): if not self.updating: return False snippet_text = self._get_snip_text(search_context) snippet_text = self._check_terms(snippet_text) if snippet_text: topic_text = search_context.topic.get_topic_text() all_text = '{0} {1}'.format(topic_text, snippet_text) #snippet_term_counts = lm_methods.extract_term_dict_from_text(snippet_text, self._stopword_file) #topic_term_counts = lm_methods.extract_term_dict_from_text(topic_text, self._stopword_file) #title_language_model = LanguageModel(term_dict=topic_term_counts) #snippet_language_model = LanguageModel(term_dict=snippet_term_counts) #topic_language_model = BayesLanguageModel(title_language_model, snippet_language_model, beta=10) term_counts = lm_methods.extract_term_dict_from_text( all_text, self._stopword_file) language_model = LanguageModel(term_dict=term_counts) self.topic_lang_model = language_model if self.background_language_model: smoothed_topic_language_model = SmoothedLanguageModel( language_model, self.background_language_model) self.topic_lang_model = smoothed_topic_language_model return True else: return False
def update_model(self, search_context): if not self.updating: return False snippet_text = self._get_snip_text(search_context) snippet_text = self._check_terms(snippet_text) if snippet_text: topic_text = search_context.topic.get_topic_text() all_text = '{0} {1}'.format(topic_text, snippet_text) #snippet_term_counts = lm_methods.extract_term_dict_from_text(snippet_text, self._stopword_file) #topic_term_counts = lm_methods.extract_term_dict_from_text(topic_text, self._stopword_file) #title_language_model = LanguageModel(term_dict=topic_term_counts) #snippet_language_model = LanguageModel(term_dict=snippet_term_counts) #topic_language_model = BayesLanguageModel(title_language_model, snippet_language_model, beta=10) term_counts = lm_methods.extract_term_dict_from_text(all_text, self._stopword_file) language_model = LanguageModel(term_dict=term_counts) self.topic_lang_model = language_model if self.background_language_model: smoothed_topic_language_model = SmoothedLanguageModel(language_model,self.background_language_model) self.topic_lang_model = smoothed_topic_language_model return True else: return False
def make_topic_language_model(self): """ Combines term counts from the topic and background to produce the language model. """ topic_text = self._make_topic_text() # Get term counts from the TREC topic title and description. topic_terms = extract_term_dict_from_text(topic_text, self._stopword_file) # Get term counts from the topic background. background_terms = self._topic.background_terms combined_term_counts = {} combined_term_counts = self._combine_dictionaries( combined_term_counts, topic_terms, self.topic_weighting) combined_term_counts = self._combine_dictionaries( combined_term_counts, background_terms, self.topic_background_weighting) # Build the LM from the combined count dictionary. language_model = LanguageModel(term_dict=combined_term_counts) self.topic_language_model = language_model log.debug("Making topic {0}".format(self._topic.id))
def _update_topic_language_model(self, text_list): """ Updates the language model for the topic, given snippet/document text (text_list) and prior (knowledge) text. """ topic_text = self._make_topic_text() document_text = ' '.join(text_list) topic_term_counts = extract_term_dict_from_text(topic_text, self._stopword_file) background_scores = self._topic.background_terms document_term_counts = extract_term_dict_from_text(document_text, self._stopword_file) combined_term_counts = {} combined_term_counts = self._combine_dictionaries(combined_term_counts, topic_term_counts, self.topic_weighting) combined_term_counts = self._combine_dictionaries(combined_term_counts, background_scores, self.topic_background_weighting) combined_term_counts = self._combine_dictionaries(combined_term_counts, document_term_counts, self.document_weighting) # Build the updated language model. new_language_model = LanguageModel(term_dict=combined_term_counts) self.topic_language_model = new_language_model log.debug("Updating topic {0}".format(self._topic.id))
def make_topic_language_model(self): """ Generates a topic language model. """ topic_text = self._make_topic_text() document_term_counts = extract_term_dict_from_text(topic_text, self._stopword_file) language_model = LanguageModel(term_dict=document_term_counts) self.topic_language_model = language_model #SmoothedLanguageModel(language_model, self.background_language_model, 100) log.debug("Making topic {0}".format(self._topic.id))
def make_topic_language_model(self): """ Generates a topic language model. """ topic_text = self._make_topic_text() document_term_counts = extract_term_dict_from_text( topic_text, self._stopword_file) language_model = LanguageModel(term_dict=document_term_counts) self.topic_language_model = language_model #SmoothedLanguageModel(language_model, self.background_language_model, 100) log.debug("Making topic {0}".format(self._topic.id))
def _generate_topic_language_model(self, search_context): """ Given a Topic object, returns a language model representation for the given topic. Override this method in inheriting classes to generate and return different language models. """ topic = search_context.topic topic_text = "{0} {1}".format(topic.title, topic.content) document_term_counts = lm_methods.extract_term_dict_from_text(topic_text, self._stopword_file) # The language model we return is simply a representation of the number of times terms occur within the topic text. topic_language_model = LanguageModel(term_dict=document_term_counts) return topic_language_model
def _generate_topic_language_model(self, search_context): """ creates an empirical language model based on the search topic, or a smoothed language model if a background model has been loaded. """ topic_text = self._make_topic_text(search_context) topic_term_counts = lm_methods.extract_term_dict_from_text(topic_text, self._stopword_file) topic_language_model = LanguageModel(term_dict=topic_term_counts) if self.background_language_model: smoothed_topic_language_model = SmoothedLanguageModel(topic_language_model, self.background_language_model) return smoothed_topic_language_model else: return topic_language_model
def _generate_topic_language_model(self, search_context): """ Given a Topic object, returns a language model representation for the given topic. Override this method in inheriting classes to generate and return different language models. """ topic = search_context.topic topic_text = "{0} {1}".format(topic.title, topic.content) document_term_counts = lm_methods.extract_term_dict_from_text( topic_text, self._stopword_file) # The language model we return is simply a representation of the number of times terms occur within the topic text. topic_language_model = LanguageModel(term_dict=document_term_counts) return topic_language_model
def _generate_topic_language_model(self, search_context): """ creates an empirical language model based on the search topic, or a smoothed language model if a background model has been loaded. """ topic_text = self._make_topic_text(search_context) topic_term_counts = lm_methods.extract_term_dict_from_text( topic_text, self._stopword_file) topic_language_model = LanguageModel(term_dict=topic_term_counts) if self.background_language_model: smoothed_topic_language_model = SmoothedLanguageModel( topic_language_model, self.background_language_model) return smoothed_topic_language_model else: return topic_language_model
def make_topic_language_model(self): """ Combines term counts from the topic and background to produce the language model. """ topic_text = self._make_topic_text() # Get term counts from the TREC topic title and description. topic_terms = extract_term_dict_from_text(topic_text, self._stopword_file) # Get term counts from the topic background. background_terms = self._topic.background_terms combined_term_counts = {} combined_term_counts = self._combine_dictionaries(combined_term_counts, topic_terms, self.topic_weighting) combined_term_counts = self._combine_dictionaries(combined_term_counts, background_terms, self.topic_background_weighting) # Build the LM from the combined count dictionary. language_model = LanguageModel(term_dict=combined_term_counts) self.topic_language_model = language_model log.debug("Making topic {0}".format(self._topic.id))