Beispiel #1
0
def test_document_sparse_attributes_scipy(scipy_sparse_matrix):
    d = Document()
    d.embedding = scipy_sparse_matrix
    d.blob = scipy_sparse_matrix
    np.testing.assert_array_equal(d.embedding.todense(),
                                  scipy_sparse_matrix.todense())
    np.testing.assert_array_equal(d.blob.todense(),
                                  scipy_sparse_matrix.todense())
Beispiel #2
0
def test_document_pretty_dict():
    doc = Document(
        blob=np.array([[0, 1, 2], [2, 1, 0]]),
        embedding=np.array([1.0, 2.0, 3.0]),
        tags={'hello': 'world'},
    )
    chunk = Document(doc, copy=True)
    chunk.blob = np.array([[3, 4, 5], [5, 4, 3]])
    chunk.embedding = np.array([4.0, 5.0, 6.0])
    match = Document(doc, copy=True)
    match.blob = np.array([[6, 7, 8], [8, 7, 6]])
    match.embedding = np.array([7.0, 8.0, 9.0])
    doc.chunks.append(chunk)
    doc.matches.append(match)
    assert doc.tags == {'hello': 'world'}
    assert doc.blob.tolist() == [[0, 1, 2], [2, 1, 0]]
    assert doc.embedding.tolist() == [1.0, 2.0, 3.0]
    assert doc.chunks[0].tags == {'hello': 'world'}
    assert doc.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]]
    assert doc.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0]
    assert doc.matches[0].tags == {'hello': 'world'}
    assert doc.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]]
    assert doc.matches[0].embedding.tolist() == [7.0, 8.0, 9.0]

    d = doc.dict(prettify_ndarrays=True)
    assert d['blob'] == [[0, 1, 2], [2, 1, 0]]
    assert d['embedding'] == [1.0, 2.0, 3.0]
    assert d['tags'] == {'hello': 'world'}
    assert d['chunks'][0]['blob'] == [[3, 4, 5], [5, 4, 3]]
    assert d['chunks'][0]['embedding'] == [4.0, 5.0, 6.0]
    assert d['chunks'][0]['tags'] == {'hello': 'world'}
    assert d['matches'][0]['blob'] == [[6, 7, 8], [8, 7, 6]]
    assert d['matches'][0]['embedding'] == [7.0, 8.0, 9.0]
    assert d['matches'][0]['tags'] == {'hello': 'world'}

    d_reconstructed = Document(d)
    assert d_reconstructed.tags == {'hello': 'world'}
    assert d_reconstructed.blob.tolist() == [[0, 1, 2], [2, 1, 0]]
    assert d_reconstructed.embedding.tolist() == [1.0, 2.0, 3.0]
    assert d_reconstructed.chunks[0].tags == {'hello': 'world'}
    assert d_reconstructed.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]]
    assert d_reconstructed.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0]
    assert d_reconstructed.matches[0].tags == {'hello': 'world'}
    assert d_reconstructed.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]]
    assert d_reconstructed.matches[0].embedding.tolist() == [7.0, 8.0, 9.0]
Beispiel #3
0
def test_document_sparse_attributes_pytorch(torch_sparse_matrix):
    d = Document()
    d.embedding = torch_sparse_matrix
    d.blob = torch_sparse_matrix

    np.testing.assert_array_equal(d.embedding.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
    np.testing.assert_array_equal(d.blob.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
Beispiel #4
0
def test_document_sparse_attributes_tensorflow(tf_sparse_matrix):
    import tensorflow as tf

    d = Document()
    d.embedding = tf_sparse_matrix
    d.blob = tf_sparse_matrix
    np.testing.assert_array_equal(d.embedding.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
    np.testing.assert_array_equal(d.blob.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
Beispiel #5
0
def test_doc_content():
    d = Document()
    assert d.content is None
    d.text = 'abc'
    assert d.content == 'abc'
    c = np.random.random([10, 10])
    d.blob = c
    np.testing.assert_equal(d.content, c)
    d.buffer = b'123'
    assert d.buffer == b'123'
Beispiel #6
0
def test_sparse_get_set():
    d = Document()
    assert d.content is None
    mat1 = coo_matrix(np.array([1, 2, 3]))
    d.content = mat1
    assert (d.content != mat1).nnz == 0
    mat2 = coo_matrix(np.array([3, 2, 1]))
    assert (d.content != mat2).nnz != 0
    d.blob = mat2
    assert (d.content != mat2).nnz == 0