Beispiel #1
0
 def next_for_guessed_voc_token(self, prev_tags_set: set, lword: str,
                                anals: list or set,
                                guesser: HashSuffixGuesser) -> dict:
     rrr = dict()
     tag_probs = dict()
     possible_tags = anals
     for tag in possible_tags:
         new_tag = guesser.mapper.map(tag)
         if new_tag > self.model.data.tag_vocabulary.max_index():
             emission_prob = UNKNOWN_TAG_WEIGHT
             transition_prob = UNKOWN_TAG_TRANSITION
             tag_probs[tag] = (transition_prob, emission_prob)
             for prev_tags in prev_tags_set:
                 rrr[prev_tags] = tag_probs
         else:
             apriori_prob = self.model.compiled_data.apriori_tag_probs[
                 new_tag]
             log_apriori_prob = math.log(apriori_prob)
             tag_log_prob = guesser.tag_log_probability(lword, tag)
             if tag_log_prob == UNKNOWN_VALUE:
                 emission_prob = UNKNOWN_TAG_WEIGHT
             else:
                 emission_prob = tag_log_prob - log_apriori_prob
             for prev_tags in prev_tags_set:
                 transition_prob = self.model.compiled_data.tag_transition_model.log_prob(
                     prev_tags.token_list, tag)
                 tag_probs[tag] = (transition_prob, emission_prob)
                 rrr[prev_tags] = tag_probs
     return rrr
 def next_for_guessed_voc_token(self, prev_tags_set: set,
                                lword: str,
                                anals: list or set,
                                guesser: HashSuffixGuesser) -> dict:
     rrr = dict()
     tag_probs = dict()
     possible_tags = anals
     for tag in possible_tags:
         new_tag = guesser.mapper.map(tag)
         if new_tag > self.model.data.tag_vocabulary.max_index():
             emission_prob = UNKNOWN_TAG_WEIGHT
             transition_prob = UNKOWN_TAG_TRANSITION
             tag_probs[tag] = (transition_prob, emission_prob)
             for prev_tags in prev_tags_set:
                 rrr[prev_tags] = tag_probs
         else:
             apriori_prob = self.model.compiled_data.apriori_tag_probs[new_tag]
             log_apriori_prob = math.log(apriori_prob)
             tag_log_prob = guesser.tag_log_probability(lword, tag)
             if tag_log_prob == UNKNOWN_VALUE:
                 emission_prob = UNKNOWN_TAG_WEIGHT
             else:
                 emission_prob = tag_log_prob - log_apriori_prob
             for prev_tags in prev_tags_set:
                 transition_prob = self.model.compiled_data.tag_transition_model.log_prob(
                     prev_tags.token_list, tag)
                 tag_probs[tag] = (transition_prob, emission_prob)
                 rrr[prev_tags] = tag_probs
     return rrr