Beispiel #1
0
def test_document_sparse_embedding(
    scipy_sparse_matrix,
    return_sparse_ndarray_cls_type,
    return_scipy_class_type,
    return_expected_type,
):
    d = Document()
    d.embedding = scipy_sparse_matrix
    cls_type = None
    sparse_kwargs = {}
    if return_sparse_ndarray_cls_type == 'scipy':
        from jina.types.ndarray.sparse.scipy import SparseNdArray

        cls_type = SparseNdArray
        sparse_kwargs['sp_format'] = return_scipy_class_type
    elif return_sparse_ndarray_cls_type == 'torch':
        from jina.types.ndarray.sparse.pytorch import SparseNdArray

        cls_type = SparseNdArray
    elif return_sparse_ndarray_cls_type == 'tf':
        from jina.types.ndarray.sparse.tensorflow import SparseNdArray

        cls_type = SparseNdArray

    embedding = d.get_sparse_embedding(sparse_ndarray_cls_type=cls_type,
                                       **sparse_kwargs)
    assert embedding is not None
    assert isinstance(embedding, return_expected_type)
    if return_sparse_ndarray_cls_type == 'torch':
        assert embedding.is_sparse
Beispiel #2
0
def test_document_sparse_attributes_scipy(scipy_sparse_matrix):
    d = Document()
    d.embedding = scipy_sparse_matrix
    d.blob = scipy_sparse_matrix
    np.testing.assert_array_equal(d.embedding.todense(),
                                  scipy_sparse_matrix.todense())
    np.testing.assert_array_equal(d.blob.todense(),
                                  scipy_sparse_matrix.todense())
Beispiel #3
0
def test_document_pretty_dict():
    doc = Document(
        blob=np.array([[0, 1, 2], [2, 1, 0]]),
        embedding=np.array([1.0, 2.0, 3.0]),
        tags={'hello': 'world'},
    )
    chunk = Document(doc, copy=True)
    chunk.blob = np.array([[3, 4, 5], [5, 4, 3]])
    chunk.embedding = np.array([4.0, 5.0, 6.0])
    match = Document(doc, copy=True)
    match.blob = np.array([[6, 7, 8], [8, 7, 6]])
    match.embedding = np.array([7.0, 8.0, 9.0])
    doc.chunks.append(chunk)
    doc.matches.append(match)
    assert doc.tags == {'hello': 'world'}
    assert doc.blob.tolist() == [[0, 1, 2], [2, 1, 0]]
    assert doc.embedding.tolist() == [1.0, 2.0, 3.0]
    assert doc.chunks[0].tags == {'hello': 'world'}
    assert doc.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]]
    assert doc.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0]
    assert doc.matches[0].tags == {'hello': 'world'}
    assert doc.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]]
    assert doc.matches[0].embedding.tolist() == [7.0, 8.0, 9.0]

    d = doc.dict(prettify_ndarrays=True)
    assert d['blob'] == [[0, 1, 2], [2, 1, 0]]
    assert d['embedding'] == [1.0, 2.0, 3.0]
    assert d['tags'] == {'hello': 'world'}
    assert d['chunks'][0]['blob'] == [[3, 4, 5], [5, 4, 3]]
    assert d['chunks'][0]['embedding'] == [4.0, 5.0, 6.0]
    assert d['chunks'][0]['tags'] == {'hello': 'world'}
    assert d['matches'][0]['blob'] == [[6, 7, 8], [8, 7, 6]]
    assert d['matches'][0]['embedding'] == [7.0, 8.0, 9.0]
    assert d['matches'][0]['tags'] == {'hello': 'world'}

    d_reconstructed = Document(d)
    assert d_reconstructed.tags == {'hello': 'world'}
    assert d_reconstructed.blob.tolist() == [[0, 1, 2], [2, 1, 0]]
    assert d_reconstructed.embedding.tolist() == [1.0, 2.0, 3.0]
    assert d_reconstructed.chunks[0].tags == {'hello': 'world'}
    assert d_reconstructed.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]]
    assert d_reconstructed.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0]
    assert d_reconstructed.matches[0].tags == {'hello': 'world'}
    assert d_reconstructed.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]]
    assert d_reconstructed.matches[0].embedding.tolist() == [7.0, 8.0, 9.0]
Beispiel #4
0
def test_document_sparse_attributes_pytorch(torch_sparse_matrix):
    d = Document()
    d.embedding = torch_sparse_matrix
    d.blob = torch_sparse_matrix

    np.testing.assert_array_equal(d.embedding.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
    np.testing.assert_array_equal(d.blob.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
Beispiel #5
0
def test_document_sparse_attributes_tensorflow(tf_sparse_matrix):
    import tensorflow as tf

    d = Document()
    d.embedding = tf_sparse_matrix
    d.blob = tf_sparse_matrix
    np.testing.assert_array_equal(d.embedding.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
    np.testing.assert_array_equal(d.blob.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
Beispiel #6
0
def test_empty_sparse_array():
    matrix = csr_matrix([[0, 0, 0, 0, 0]])
    doc = Document()
    doc.embedding = matrix
    assert isinstance(doc.embedding, coo_matrix)
    assert (doc.embedding != matrix).nnz == 0
Beispiel #7
0
def test_document_sparse_embedding(scipy_sparse_matrix):
    d = Document()
    d.embedding = scipy_sparse_matrix
    assert d.sparse_embedding is not None
    assert isinstance(d.sparse_embedding, coo_matrix)