Beispiel #1
0
def test_vocab_prune_vectors():
    vocab = Vocab()
    _ = vocab["cat"]  # noqa: F841
    _ = vocab["dog"]  # noqa: F841
    _ = vocab["kitten"]  # noqa: F841
    data = numpy.ndarray((5, 3), dtype="f")
    data[0] = 1.0
    data[1] = 2.0
    data[2] = 1.1
    vocab.set_vector("cat", data[0])
    vocab.set_vector("dog", data[1])
    vocab.set_vector("kitten", data[2])

    remap = vocab.prune_vectors(2)
    assert list(remap.keys()) == ["kitten"]
    neighbour, similarity = list(remap.values())[0]
    assert neighbour == "cat", remap
    assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-6)
Beispiel #2
0
def test_vocab_prune_vectors():
    vocab = Vocab(vectors_name="test_vocab_prune_vectors")
    _ = vocab["cat"]  # noqa: F841
    _ = vocab["dog"]  # noqa: F841
    _ = vocab["kitten"]  # noqa: F841
    data = numpy.ndarray((5, 3), dtype="f")
    data[0] = [1.0, 1.2, 1.1]
    data[1] = [0.3, 1.3, 1.0]
    data[2] = [0.9, 1.22, 1.05]
    vocab.set_vector("cat", data[0])
    vocab.set_vector("dog", data[1])
    vocab.set_vector("kitten", data[2])

    remap = vocab.prune_vectors(2, batch_size=2)
    assert list(remap.keys()) == ["kitten"]
    neighbour, similarity = list(remap.values())[0]
    assert neighbour == "cat", remap
    assert_allclose(similarity, cosine(data[0], data[2]), atol=1e-4, rtol=1e-3)