Beispiel #1
0
 def test_add_from_counter_n_most_values_with_default(self):
     vocab = Vocabulary()
     new_token_to_id = Counter({"a": 3, "b": 1, "c": 4, "d": 1})
     vocab.add_from_counter(
         "token_to_id", new_token_to_id, n_most_values=4, add_values=["<SOS>", "<EOS>"],
     )
     self.assertDictEqual(vocab.token_to_id, {"<SOS>": 0, "<EOS>": 1, "c": 2, "a": 3})
def _vocab_from_counters(
    config: PreprocessingConfig, token_counter: Counter, target_counter: Counter, type_counter: Counter
) -> Vocabulary:
    vocab = Vocabulary()
    names_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_name else [PAD, UNK]
    vocab.add_from_counter("token_to_id", token_counter, config.subtoken_vocab_max_size, names_additional_tokens)
    target_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_target else [PAD, UNK]
    vocab.add_from_counter("label_to_id", target_counter, config.target_vocab_max_size, target_additional_tokens)
    paths_additional_tokens = [SOS, EOS, PAD, UNK] if config.wrap_path else [PAD, UNK]
    vocab.add_from_counter("type_to_id", type_counter, -1, paths_additional_tokens)
    return vocab
Beispiel #3
0
 def test_add_from_counter_all_values(self):
     vocab = Vocabulary()
     new_token_to_id = Counter({"a": 3, "b": 1, "c": 4, "d": 1})
     vocab.add_from_counter("token_to_id", new_token_to_id)
     self.assertDictEqual(vocab.token_to_id, {"c": 0, "a": 1, "b": 2, "d": 3})
Beispiel #4
0
 def test_add_from_counter_raise_error(self):
     vocab = Vocabulary()
     values_counter = Counter({"a": 3, "b": 1, "c": 4, "d": 1})
     with self.assertRaises(ValueError):
         vocab.add_from_counter("unknown_field", values_counter)