def test_save_vocab(self, instances, tmpdir): single_instance = instances["single_instance"] MAX_NUM_WORDS = 100 vocab_builder = Vocab(instances=single_instance, max_num_tokens=MAX_NUM_WORDS) vocab_builder.build_vocab() vocab_file = tmpdir.mkdir("tempdir").join("vocab.json") vocab_builder.save_to_file(vocab_file) assert os.path.isfile(vocab_file)
def test_load_vocab(self, instances, tmpdir): single_instance = instances["single_instance"] MAX_NUM_WORDS = 100 vocab_builder = Vocab(instances=single_instance, max_num_tokens=MAX_NUM_WORDS) vocab_builder.build_vocab() vocab_file = tmpdir.mkdir("tempdir").join("vocab.json") vocab_builder.save_to_file(vocab_file) vocab = Vocab.load_from_file(filename=vocab_file) assert vocab.get_vocab_len() == 3 + len(vocab_builder.special_vocab)