def test_build_with_min_count(): tokens = ["A", "A", "A", "B", "B"] token_counter = Counter(tokens) vocab = Vocab("token_name", min_count=3) vocab.build(token_counter) assert vocab.get_all_tokens() == ["[PAD]", "[UNK]", "A"]
def _build_vocab_with_config(self, token_name, token_maker, token_counter): token_maker.vocab_config["token_name"] = token_name vocab = Vocab(**token_maker.vocab_config) if vocab.pretrained_path is not None: vocab.build_with_pretrained_file(token_counter) else: vocab.build(token_counter) return vocab