Beispiel #1
0
def test_non_empty_fields():
    d_score = Document(score=NamedScore(value=42))
    assert d_score.non_empty_fields == ('id', 'score')

    d = Document()
    assert d.non_empty_fields == ('id', )

    d = Document(id='')
    assert not d.non_empty_fields
def test_graph_count_invariance(graph, expected_output):

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_edge(doc0, doc1)
    graph.add_edge(doc0, doc1)

    assert graph.num_edges == expected_output
def test_graph_edge_behaviour_creation(graph, expected_output):

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_edge(doc0, doc1)
    graph.add_edge(doc1, doc0)

    assert graph.num_edges == expected_output
Beispiel #4
0
def test_document_sparse_attributes_pytorch(torch_sparse_matrix):
    d = Document()
    d.embedding = torch_sparse_matrix
    d.blob = torch_sparse_matrix

    np.testing.assert_array_equal(d.embedding.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
    np.testing.assert_array_equal(d.blob.todense(),
                                  torch_sparse_matrix.to_dense().numpy())
Beispiel #5
0
 def build_document(chunk=None):
     d = Document()
     d.chunks.append(chunk)
     d.chunks[0].update_content_hash(
         exclude_fields=('parent_id', 'id', 'content_hash')
     )
     d.chunks[0].parent_id = 0
     d.update_content_hash(include_fields=('chunks',), exclude_fields=None)
     return d
Beispiel #6
0
def test_add_single_edge_from_id_strings_non_existing_nodes():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    with pytest.raises(AssertionError):
        graph.add_single_edge(doc0.id,
                              doc1.id,
                              features={'text': 'I connect Doc0 and Doc1'})
Beispiel #7
0
def test_uri_get_set():
    a = Document()
    a.uri = 'https://abc.com/a.jpg'
    assert a.uri == 'https://abc.com/a.jpg'
    assert a.mime_type == 'image/jpeg'
    a.uri = 'abcdefg'
    assert a.uri == 'abcdefg'
    a.content = 'abcdefg'
    assert a.text == 'abcdefg'
    assert not a.uri
Beispiel #8
0
def test_add_remove_node_deprecated():
    graph = GraphDocument()
    d1 = Document(id='1')
    d2 = Document(id='2')
    graph.add_node(d1)
    graph.add_node(d2)
    assert len(graph.nodes) == 2
    graph.remove_node(d1)
    graph.remove_node(d2)
    assert len(graph.nodes) == 0
Beispiel #9
0
def test_pydatic_document_to_jina_document():
    document_proto_model = PROTO_TO_PYDANTIC_MODELS.DocumentProto

    jina_doc = Document(document_proto_model(text='abc').json())
    assert jina_doc.text == 'abc'
    assert jina_doc.content == 'abc'

    jina_doc = Document(document_proto_model(text='abc').dict())
    assert jina_doc.text == 'abc'
    assert jina_doc.content == 'abc'
Beispiel #10
0
def test_manual_update_edges_features(graph, expected_output):
    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_single_edge(doc0, doc1)
    edge_key = graph._get_edge_key(doc0.id, doc1.id)

    graph._pb_body.graph.edge_features[edge_key] = {'number_value': 1234}

    assert graph._pb_body.graph.edge_features[edge_key]['number_value'] == 1234
Beispiel #11
0
def test_doc_content():
    d = Document()
    assert d.content is None
    d.text = 'abc'
    assert d.content == 'abc'
    c = np.random.random([10, 10])
    d.blob = c
    np.testing.assert_equal(d.content, c)
    d.buffer = b'123'
    assert d.buffer == b'123'
Beispiel #12
0
def test_document_sparse_attributes_tensorflow(tf_sparse_matrix):
    import tensorflow as tf

    d = Document()
    d.embedding = tf_sparse_matrix
    d.blob = tf_sparse_matrix
    np.testing.assert_array_equal(d.embedding.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
    np.testing.assert_array_equal(d.blob.todense(),
                                  tf.sparse.to_dense(tf_sparse_matrix))
def test_added_edges_in_edge_features(graph, expected_output):

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_edge(doc0, doc1)
    edge_key = graph._get_edge_key(doc0, doc1)

    assert edge_key in graph.edge_features
    assert graph.edge_features[edge_key] is None
Beispiel #14
0
def test_sparse_get_set():
    d = Document()
    assert d.content is None
    mat1 = coo_matrix(np.array([1, 2, 3]))
    d.content = mat1
    assert (d.content != mat1).nnz == 0
    mat2 = coo_matrix(np.array([3, 2, 1]))
    assert (d.content != mat2).nnz != 0
    d.blob = mat2
    assert (d.content != mat2).nnz == 0
Beispiel #15
0
def test_graph_add_multiple_nodes():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')
    graph.add_nodes([doc0, doc1, doc2, doc3])
    assert graph.num_nodes == 4
    assert graph.num_edges == 0
Beispiel #16
0
def test_doc_update_fields():
    a = Document()
    b = np.random.random([10, 10])
    c = {'tags': 'string', 'tag-tag': {'tags': 123.45}}
    d = [12, 34, 56]
    e = 'text-mod'
    a.update(embedding=b, tags=c, location=d, modality=e)
    np.testing.assert_equal(a.embedding, b)
    assert list(a.location) == d
    assert a.modality == e
    assert MessageToDict(a.tags) == c
Beispiel #17
0
def test_doc_score():
    from jina.types.score import NamedScore

    doc = Document(text='text')

    score = NamedScore(op_name='operation', value=10.0, ref_id=doc.id)
    doc.score = score

    assert doc.score.op_name == 'operation'
    assert doc.score.value == 10.0
    assert doc.score.ref_id == doc.id
Beispiel #18
0
def test_tags_document():
    doc = PROTO_TO_PYDANTIC_MODELS.DocumentProto(hello='world')
    assert doc.tags == {'hello': 'world'}
    assert MessageToDict(Document(doc.dict()).tags) == {'hello': 'world'}

    doc = PROTO_TO_PYDANTIC_MODELS.DocumentProto(hello='world', tags={'key': 'value'})
    assert doc.tags == {'hello': 'world', 'key': 'value'}
    assert MessageToDict(Document(doc.dict()).tags) == {
        'hello': 'world',
        'key': 'value',
    }
Beispiel #19
0
def test_doc_plot():
    docs = [Document(id='🐲', embedding=np.array([0, 0]), tags={'guardian': 'Azure Dragon', 'position': 'East'}),
            Document(id='🐦', embedding=np.array([1, 0]), tags={'guardian': 'Vermilion Bird', 'position': 'South'}),
            Document(id='🐢', embedding=np.array([0, 1]), tags={'guardian': 'Black Tortoise', 'position': 'North'}),
            Document(id='🐯', embedding=np.array([1, 1]), tags={'guardian': 'White Tiger', 'position': 'West'})]

    docs[0].chunks.append(docs[1])
    docs[0].chunks[0].chunks.append(docs[2])
    docs[0].matches.append(docs[3])

    assert docs[0]._mermaid_to_url('svg')
Beispiel #20
0
def test_query_match_array_sort_scores():
    query = Document()
    query.matches = [
        Document(id=i, copy=True, scores={'euclid': 10 - i}) for i in range(10)
    ]
    assert query.matches[0].id == '0'
    assert query.matches[0].scores['euclid'].value == 10
    query.matches.sort(
        key=lambda m: m.scores['euclid'].value)  # sort matches by their values
    assert query.matches[0].id == '9'
    assert query.matches[0].scores['euclid'].value == 1
Beispiel #21
0
def get_test_doc():
    s = Document(id='🐲',
                 content='hello-world',
                 tags={'a': 'b'},
                 embedding=np.array([1, 2, 3]),
                 chunks=[Document(id='🐢')])
    d = Document(id='🐦',
                 content='goodbye-world',
                 tags={'c': 'd'},
                 embedding=np.array([4, 5, 6]),
                 chunks=[Document(id='🐯')])
    return s, d
Beispiel #22
0
def test_add_remove_edge_deprecated():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')

    graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'})
    assert graph.num_nodes == 2
    assert graph.num_edges == 1
    graph.remove_edge(doc0, doc1)
    assert graph.num_nodes == 2
    assert graph.num_edges == 0
Beispiel #23
0
def eval_request():
    req = Request()
    req.request_type = 'search'
    # doc: 1
    # doc: 2
    # doc: 3
    # doc: 4
    # doc: 5 - will be missing from KV indexer
    for idx in range(5):
        dp = Document()
        dp.id = f'0{str(idx + 1)}' * 8
        req.docs.append(dp)
    return req
Beispiel #24
0
def test_doc_update_fields():
    a = Document()
    b = np.random.random([10, 10])
    c = {'tags': 'string', 'tag-tag': {'tags': 123.45}}
    d = [12, 34, 56]
    e = 'text-mod'
    w = 2.0
    a.set_attributes(embedding=b, tags=c, location=d, modality=e, weight=w)
    np.testing.assert_equal(a.embedding, b)
    assert list(a.location) == d
    assert a.modality == e
    assert a.tags == c
    assert a.weight == w
Beispiel #25
0
def test_set_get_mime():
    a = Document()
    a.mime_type = 'jpg'
    assert a.mime_type == 'image/jpeg'
    b = Document()
    b.mime_type = 'jpeg'
    assert b.mime_type == 'image/jpeg'
    c = Document()
    c.mime_type = '.jpg'
    assert c.mime_type == 'image/jpeg'
def graph():
    graph = GraphDocument()

    doc0 = Document(text='Document0')
    doc1 = Document(text='Document1')
    doc2 = Document(text='Document2')
    doc3 = Document(text='Document3')

    graph.add_edge(doc0, doc1, features={'text': 'I connect Doc0 and Doc1'})
    graph.add_edge(doc0, doc2, features={'text': 'I connect Doc0 and Doc2'})
    graph.add_edge(doc2, doc1, features={'text': 'I connect Doc2 and Doc1'})
    graph.add_edge(doc1, doc3, features={'text': 'I connect Doc1 and Doc3'})
    graph.add_edge(doc2, doc3, features={'text': 'I connect Doc2 and Doc3'})
    return graph
Beispiel #27
0
def test_doc_arbitrary_dict(from_str):
    d_src = {'id': '123', 'hello': 'world', 'tags': {'good': 'bye'}}
    if from_str:
        d_src = json.dumps(d_src)
    d = Document(d_src)
    assert d.id == '123'
    assert d.tags['hello'] == 'world'
    assert d.tags['good'] == 'bye'

    d_src = {'hello': 'world', 'good': 'bye'}
    if from_str:
        d_src = json.dumps(d_src)
    d = Document(d_src)
    assert d.tags['hello'] == 'world'
    assert d.tags['good'] == 'bye'
def empty_documents():
    docs = []
    for idx in range(100, 120):
        with Document() as d:
            d.id = f'{idx:0>16}'
            docs.append(d)
    return DocumentSet(docs)
def deleted_documents():
    docs = []
    for idx in range(3):
        with Document() as d:
            d.id = f'{idx:0>16}'
            docs.append(d)
    return DocumentSet(docs)
 def documents(embedding_cls_type, text_prefix='', num_docs=5):
     docs = []
     for idx in range(num_docs):
         with Document(text=f'{text_prefix}{idx}') as d:
             d.id = f'{idx:0>16}'
             dense_embedding = np.random.random([10])
             if embedding_cls_type == 'dense':
                 d.embedding = dense_embedding
             elif embedding_cls_type == 'scipy_csr':
                 d.embedding = scipy.sparse.csr_matrix(dense_embedding)
             elif embedding_cls_type == 'scipy_coo':
                 d.embedding = scipy.sparse.coo_matrix(dense_embedding)
             elif embedding_cls_type == 'torch':
                 sparse_embedding = scipy.sparse.coo_matrix(dense_embedding)
                 values = sparse_embedding.data
                 indices = np.vstack(
                     (sparse_embedding.row, sparse_embedding.col))
                 d.embedding = torch.sparse_coo_tensor(
                     indices,
                     values,
                     sparse_embedding.shape,
                 )
             elif embedding_cls_type == 'tf':
                 sparse_embedding = scipy.sparse.coo_matrix(dense_embedding)
                 values = sparse_embedding.data
                 indices = [(x, y) for x, y in zip(sparse_embedding.row,
                                                   sparse_embedding.col)]
                 d.embedding = tf.SparseTensor(
                     indices=indices,
                     values=values,
                     dense_shape=[1, 10],
                 )
         docs.append(d)
     return DocumentArray(docs)