Esempio n. 1
0
def test_build_with_min_count():
    tokens = ["A", "A", "A", "B", "B"]
    token_counter = Counter(tokens)

    vocab = Vocab("token_name", min_count=3)
    vocab.build(token_counter)

    assert vocab.get_all_tokens() == ["[PAD]", "[UNK]", "A"]
Esempio n. 2
0
    def _build_vocab_with_config(self, token_name, token_maker, token_counter):
        token_maker.vocab_config["token_name"] = token_name
        vocab = Vocab(**token_maker.vocab_config)

        if vocab.pretrained_path is not None:
            vocab.build_with_pretrained_file(token_counter)
        else:
            vocab.build(token_counter)
        return vocab