def test_document_sparse_attributes_scipy(scipy_sparse_matrix): d = Document() d.embedding = scipy_sparse_matrix d.blob = scipy_sparse_matrix np.testing.assert_array_equal(d.embedding.todense(), scipy_sparse_matrix.todense()) np.testing.assert_array_equal(d.blob.todense(), scipy_sparse_matrix.todense())
def test_document_pretty_dict(): doc = Document( blob=np.array([[0, 1, 2], [2, 1, 0]]), embedding=np.array([1.0, 2.0, 3.0]), tags={'hello': 'world'}, ) chunk = Document(doc, copy=True) chunk.blob = np.array([[3, 4, 5], [5, 4, 3]]) chunk.embedding = np.array([4.0, 5.0, 6.0]) match = Document(doc, copy=True) match.blob = np.array([[6, 7, 8], [8, 7, 6]]) match.embedding = np.array([7.0, 8.0, 9.0]) doc.chunks.append(chunk) doc.matches.append(match) assert doc.tags == {'hello': 'world'} assert doc.blob.tolist() == [[0, 1, 2], [2, 1, 0]] assert doc.embedding.tolist() == [1.0, 2.0, 3.0] assert doc.chunks[0].tags == {'hello': 'world'} assert doc.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]] assert doc.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0] assert doc.matches[0].tags == {'hello': 'world'} assert doc.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]] assert doc.matches[0].embedding.tolist() == [7.0, 8.0, 9.0] d = doc.dict(prettify_ndarrays=True) assert d['blob'] == [[0, 1, 2], [2, 1, 0]] assert d['embedding'] == [1.0, 2.0, 3.0] assert d['tags'] == {'hello': 'world'} assert d['chunks'][0]['blob'] == [[3, 4, 5], [5, 4, 3]] assert d['chunks'][0]['embedding'] == [4.0, 5.0, 6.0] assert d['chunks'][0]['tags'] == {'hello': 'world'} assert d['matches'][0]['blob'] == [[6, 7, 8], [8, 7, 6]] assert d['matches'][0]['embedding'] == [7.0, 8.0, 9.0] assert d['matches'][0]['tags'] == {'hello': 'world'} d_reconstructed = Document(d) assert d_reconstructed.tags == {'hello': 'world'} assert d_reconstructed.blob.tolist() == [[0, 1, 2], [2, 1, 0]] assert d_reconstructed.embedding.tolist() == [1.0, 2.0, 3.0] assert d_reconstructed.chunks[0].tags == {'hello': 'world'} assert d_reconstructed.chunks[0].blob.tolist() == [[3, 4, 5], [5, 4, 3]] assert d_reconstructed.chunks[0].embedding.tolist() == [4.0, 5.0, 6.0] assert d_reconstructed.matches[0].tags == {'hello': 'world'} assert d_reconstructed.matches[0].blob.tolist() == [[6, 7, 8], [8, 7, 6]] assert d_reconstructed.matches[0].embedding.tolist() == [7.0, 8.0, 9.0]
def test_document_sparse_attributes_pytorch(torch_sparse_matrix): d = Document() d.embedding = torch_sparse_matrix d.blob = torch_sparse_matrix np.testing.assert_array_equal(d.embedding.todense(), torch_sparse_matrix.to_dense().numpy()) np.testing.assert_array_equal(d.blob.todense(), torch_sparse_matrix.to_dense().numpy())
def test_document_sparse_attributes_tensorflow(tf_sparse_matrix): import tensorflow as tf d = Document() d.embedding = tf_sparse_matrix d.blob = tf_sparse_matrix np.testing.assert_array_equal(d.embedding.todense(), tf.sparse.to_dense(tf_sparse_matrix)) np.testing.assert_array_equal(d.blob.todense(), tf.sparse.to_dense(tf_sparse_matrix))
def test_doc_content(): d = Document() assert d.content is None d.text = 'abc' assert d.content == 'abc' c = np.random.random([10, 10]) d.blob = c np.testing.assert_equal(d.content, c) d.buffer = b'123' assert d.buffer == b'123'
def test_sparse_get_set(): d = Document() assert d.content is None mat1 = coo_matrix(np.array([1, 2, 3])) d.content = mat1 assert (d.content != mat1).nnz == 0 mat2 = coo_matrix(np.array([3, 2, 1])) assert (d.content != mat2).nnz != 0 d.blob = mat2 assert (d.content != mat2).nnz == 0