def test_vocab_prune_vectors(): vocab = Vocab() _ = vocab["cat"] # noqa: F841 _ = vocab["dog"] # noqa: F841 _ = vocab["kitten"] # noqa: F841 data = numpy.ndarray((5, 3), dtype="f") data[0] = 1.0 data[1] = 2.0 data[2] = 1.1 vocab.set_vector("cat", data[0]) vocab.set_vector("dog", data[1]) vocab.set_vector("kitten", data[2]) remap = vocab.prune_vectors(2) assert list(remap.keys()) == ["kitten"] neighbour, similarity = list(remap.values())[0] assert neighbour == "cat", remap assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-6)
def test_vocab_prune_vectors(): vocab = Vocab(vectors_name="test_vocab_prune_vectors") _ = vocab["cat"] # noqa: F841 _ = vocab["dog"] # noqa: F841 _ = vocab["kitten"] # noqa: F841 data = numpy.ndarray((5, 3), dtype="f") data[0] = [1.0, 1.2, 1.1] data[1] = [0.3, 1.3, 1.0] data[2] = [0.9, 1.22, 1.05] vocab.set_vector("cat", data[0]) vocab.set_vector("dog", data[1]) vocab.set_vector("kitten", data[2]) remap = vocab.prune_vectors(2, batch_size=2) assert list(remap.keys()) == ["kitten"] neighbour, similarity = list(remap.values())[0] assert neighbour == "cat", remap assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-4, rtol=1e-3)