Exemple #1
0
    def test_filter(self, file, reference_dict, num_present):
        target = sample_dict_subset(reference_dict, num_present)

        def condition(word):
            return word in target

        actual = dict(file.filter(condition))
        assert_equiv_word2vec(actual, target)
Exemple #2
0
 def test_load(self, file, reference_dict):
     target_word2vec = sample_dict_subset(reference_dict, 10)
     words_to_load = list(target_word2vec.keys())
     # All present words
     word2vec = file.load(words_to_load)
     assert_equiv_word2vec(word2vec, target_word2vec)
     # One word missing
     with pytest.raises(KeyError):
         file.load(words_to_load + ['<<missing-word>>'])
Exemple #3
0
def generate_find_case(word2vec: dict, num_present_words: int,
                       num_missing_words: int):
    """ Generate a test case for EmbFile.find() """
    target_dict = sample_dict_subset(word2vec, num_present_words)
    present_words = list(target_dict.keys())
    missing_words = generate_vocab(num_missing_words, forbidden_words=word2vec)
    query_words = shuffled(present_words + list(missing_words))
    return SimpleNamespace(
        query_words=query_words,  # words to find
        target_dict=target_dict,  # expected output word2vec dictionary
        present_words=present_words,  # subset of query_words in the file
        missing_words=missing_words)  # subset of query_words NOT in the file