Beispiel #1
0
def test_chunks_fail(document_factory, document, groundtruth):
    # document and groundtruth not the same length
    groundtruth.chunks.append(document_factory.create('test chunk 4'))
    pair = DocGroundtruthPair(doc=document, groundtruth=groundtruth)
    with pytest.raises(AssertionError):
        for _ in pair.chunks:
            pass
Beispiel #2
0
def test_docgroundtruth_pair():
    def add_matches(doc: jina_pb2.DocumentProto, num_matches):
        for idx in range(num_matches):
            match = doc.matches.add()
            match.adjacency = doc.adjacency + 1

    def add_chunks(doc: jina_pb2.DocumentProto, num_chunks):
        for idx in range(num_chunks):
            chunk = doc.chunks.add()
            chunk.granularity = doc.granularity + 1

    doc = jina_pb2.DocumentProto()
    gt = jina_pb2.DocumentProto()
    add_matches(doc, 3)
    add_matches(gt, 3)
    add_chunks(doc, 3)
    add_chunks(gt, 3)

    pair = DocGroundtruthPair(doc, gt)

    j = 0
    for chunk_pair in pair.chunks:
        assert chunk_pair.doc.granularity == 1
        assert chunk_pair.groundtruth.granularity == 1
        j += 1

    k = 0
    for match_pair in pair.matches:
        assert match_pair.doc.adjacency == 1
        assert match_pair.groundtruth.adjacency == 1
        k += 1

    assert j == 3
    assert k == 3
def ground_truth_pairs():
    num_docs = 10
    pairs = []
    for idx in range(num_docs):
        doc = Document(embedding=np.array([1, 1]))
        gt = Document(embedding=np.array([2, 2]))
        pairs.append(DocGroundtruthPair(doc=doc, groundtruth=gt))
    return pairs
def ground_truth_pairs():
    num_docs = 10

    def add_matches(doc: jina_pb2.DocumentProto, num_matches):
        for idx in range(num_matches):
            match = doc.matches.add()
            match.tags['id'] = idx
            match.score.value = idx

    pairs = []
    for idx in range(num_docs):
        doc = jina_pb2.DocumentProto()
        gt = jina_pb2.DocumentProto()
        add_matches(doc, num_docs)
        add_matches(gt, num_docs)
        pairs.append(DocGroundtruthPair(doc=doc, groundtruth=gt))
    return pairs
Beispiel #5
0
def test_chunks_success(document, groundtruth):
    pair = DocGroundtruthPair(doc=document, groundtruth=groundtruth)
    assert isinstance(pair.chunks, types.GeneratorType)
    for _ in pair.chunks:
        pass
Beispiel #6
0
def test_init(document, groundtruth):
    assert DocGroundtruthPair(doc=document, groundtruth=groundtruth)
Beispiel #7
0
 def create(self):
     return DocGroundtruthPair(
         doc=doc_with_field_type.create(),
         groundtruth=groundtruth_with_field_type.create()
     )