def get_frequency(self, word): """ Gets the absolute count of a given word in the Brown corpus. These counts are case-insensitive. :param word: word to find in the corpus :return: number of times the word appears in the corpus, ignoring letter case """ def case_insensitive_comparator(x, y): return 0 if x.lower() == y.lower() else 1 if x.lower() > y.lower() else -1 in_cmu = binary_search(self.words, word.lower(), case_insensitive_comparator) != -1 return self.word_distribution[word.lower()] + in_cmu
def get_observed_probability(self, state, observation): """ Gets the probability of an observation given a state. This is not a true probability, as observations that are not part of the list of observations will be given a weight as specified by the unseen_observation_handler :param state: the current state of the model :param observation: the observation :return: probability of the observation given the state, defined as count(observation, state) / count(state), or value provided by the unseen_observation_handler if the observation does not exist in the list of possible observations """ if binary_search(self.observations, observation) == -1: return self.unseen_observation_handler.get_probability(state, observation) observations = self.observation_frequencies[state] return observations[observation] / self.state_counts[state] if observation in observations else 0