def test_filter(self, file, reference_dict, num_present): target = sample_dict_subset(reference_dict, num_present) def condition(word): return word in target actual = dict(file.filter(condition)) assert_equiv_word2vec(actual, target)
def test_load(self, file, reference_dict): target_word2vec = sample_dict_subset(reference_dict, 10) words_to_load = list(target_word2vec.keys()) # All present words word2vec = file.load(words_to_load) assert_equiv_word2vec(word2vec, target_word2vec) # One word missing with pytest.raises(KeyError): file.load(words_to_load + ['<<missing-word>>'])
def generate_find_case(word2vec: dict, num_present_words: int, num_missing_words: int): """ Generate a test case for EmbFile.find() """ target_dict = sample_dict_subset(word2vec, num_present_words) present_words = list(target_dict.keys()) missing_words = generate_vocab(num_missing_words, forbidden_words=word2vec) query_words = shuffled(present_words + list(missing_words)) return SimpleNamespace( query_words=query_words, # words to find target_dict=target_dict, # expected output word2vec dictionary present_words=present_words, # subset of query_words in the file missing_words=missing_words) # subset of query_words NOT in the file