Exemple #1
0
 def test_idx2token_cries_for_vocab(self, instances):
     single_instance = instances["single_instance"]
     MAX_NUM_WORDS = 100
     vocab_builder = Vocab(instances=single_instance,
                           max_num_tokens=MAX_NUM_WORDS)
     with pytest.raises(ValueError):
         vocab_builder.get_idx_from_token(1)
Exemple #2
0
 def test_add_tokens(self, instances, tmpdir):
     instance_dict = instances
     single_instance = instance_dict["single_instance"]
     MAX_NUM_WORDS = 100
     vocab_file = tmpdir.mkdir("tempdir").join("vocab.json")
     vocab = Vocab(
         instances=single_instance,
         max_num_tokens=MAX_NUM_WORDS,
         store_location=vocab_file,
     )
     vocab.build_vocab()
     vocab.add_tokens(["very", "much"])
     assert "very" in vocab.vocab.keys()
     assert "much" in vocab.vocab.keys()
     assert vocab.vocab["very"] == (1, 7)
     assert vocab.vocab["much"] == (1, 8)
     assert vocab.get_token_from_idx(7) == "very"
     assert vocab.get_token_from_idx(8) == "much"
     assert vocab.get_idx_from_token("very") == 7
     assert vocab.get_idx_from_token("much") == 8