class SingleTagProbabilityEstimator(object): def __init__(self, dict_path): cpd_path = os.path.join(dict_path, 'p_t_given_w.intdawg') self.p_t_given_w = ConditionalProbDistDAWG().load(cpd_path) def apply_to_parses(self, word, word_lower, parses): if not parses: return parses probs = [ self.p_t_given_w.prob(word_lower, tag) for (word, tag, normal_form, score, methods_stack) in parses ] if sum(probs) == 0: # no P(t|w) information is available; return normalized estimate k = 1.0 / sum(map(_score_getter, parses)) return [(word, tag, normal_form, score * k, methods_stack) for (word, tag, normal_form, score, methods_stack) in parses] # replace score with P(t|w) probability return sorted([(word, tag, normal_form, prob, methods_stack) for (word, tag, normal_form, score, methods_stack), prob in zip(parses, probs)], key=_score_getter, reverse=True) def apply_to_tags(self, word, word_lower, tags): if not tags: return tags return sorted(tags, key=lambda tag: self.p_t_given_w.prob(word_lower, tag), reverse=True)
class ProbabilityEstimator(object): def __init__(self, dict_path): cpd_path = os.path.join(dict_path, 'p_t_given_w.intdawg') self.p_t_given_w = ConditionalProbDistDAWG().load(cpd_path) def apply_to_parses(self, word, word_lower, parses): if not parses: return parses probs = [self.p_t_given_w.prob(word_lower, tag) for (word, tag, normal_form, score, methods_stack) in parses] if sum(probs) == 0: # no P(t|w) information is available; return normalized estimate k = 1.0 / sum(map(_score_getter, parses)) return [ (word, tag, normal_form, score*k, methods_stack) for (word, tag, normal_form, score, methods_stack) in parses ] # replace score with P(t|w) probability return sorted([ (word, tag, normal_form, prob, methods_stack) for (word, tag, normal_form, score, methods_stack), prob in zip(parses, probs) ], key=_score_getter, reverse=True) def apply_to_tags(self, word, word_lower, tags): if not tags: return tags return sorted(tags, key=lambda tag: self.p_t_given_w.prob(word_lower, tag), reverse=True )