コード例 #1
0
 def get_frequency(self, word):
     """
     Gets the absolute count of a given word in the Brown corpus.  These counts are case-insensitive.
     :param word: word to find in the corpus
     :return: number of times the word appears in the corpus, ignoring letter case
     """
     def case_insensitive_comparator(x, y):
         return 0 if x.lower() == y.lower() else 1 if x.lower() > y.lower() else -1
     in_cmu = binary_search(self.words, word.lower(), case_insensitive_comparator) != -1
     return self.word_distribution[word.lower()] + in_cmu
コード例 #2
0
 def get_observed_probability(self, state, observation):
     """
     Gets the probability of an observation given a state.  This is not a true probability, as observations that are
     not part of the list of observations will be given a weight as specified by the unseen_observation_handler
     :param state: the current state of the model
     :param observation: the observation
     :return: probability of the observation given the state, defined as count(observation, state) / count(state), or
     value provided by the unseen_observation_handler if the observation does not exist in the list of possible
     observations
     """
     if binary_search(self.observations, observation) == -1:
         return self.unseen_observation_handler.get_probability(state, observation)
     observations = self.observation_frequencies[state]
     return observations[observation] / self.state_counts[state] if observation in observations else 0