def transform_tags(self, pos: int, tag_voc: BaseVocabulary) -> dict: mp = {} for k, v in self.anals[pos]: tagstr = self.anal2tag(k) tag = tag_voc.index(tagstr) if tag is None: tag = tag_voc.add_element(tagstr) mp[tag] = v return mp
def __init__(self, tagging_order: int, emission_order: int, suffix_length: int, rare_frequency: int, standard_tokens_lexicon: Lexicon, spec_tokens_lexicon: Lexicon, tag_vocabulary: BaseVocabulary): self.tagging_order = tagging_order self.emission_order = emission_order self.suffix_length = suffix_length self.rare_frequency = rare_frequency self.standard_tokens_lexicon = standard_tokens_lexicon self.spec_tokens_lexicon = spec_tokens_lexicon self.tag_vocabulary = tag_vocabulary self.eos_index = tag_vocabulary.add_element(ModelData.EOS_TAG) self.bos_index = tag_vocabulary.add_element(ModelData.BOS_TAG)
def convert(self, word: str, vocab: BaseVocabulary) -> Token: anal = self.analyse(word) # (str, int) tag = vocab.word(anal[1]) # str return Token(word, anal[0], tag)