Beispiel #1
0
def test_docgroundtruth_pair():
    def add_matches(doc: jina_pb2.Document, num_matches):
        for idx in range(num_matches):
            match = doc.matches.add()
            match.adjacency = doc.adjacency + 1

    def add_chunks(doc: jina_pb2.Document, num_chunks):
        for idx in range(num_chunks):
            chunk = doc.chunks.add()
            chunk.granularity = doc.granularity + 1

    doc = jina_pb2.Document()
    gt = jina_pb2.Document()
    add_matches(doc, 3)
    add_matches(gt, 3)
    add_chunks(doc, 3)
    add_chunks(gt, 3)

    pair = DocGroundtruthPair(doc, gt)

    j = 0
    for chunk_pair in pair.chunks:
        assert chunk_pair.doc.granularity == 1
        assert chunk_pair.groundtruth.granularity == 1
        j += 1

    k = 0
    for match_pair in pair.matches:
        assert match_pair.doc.adjacency == 1
        assert match_pair.groundtruth.adjacency == 1
        k += 1

    assert j == 3
    assert k == 3
Beispiel #2
0
def ground_truth_pairs():
    num_docs = 10
    pairs = []
    for idx in range(num_docs):
        doc = jina_pb2.DocumentProto()
        gt = jina_pb2.DocumentProto()
        NdArray(doc.embedding).value = np.array([1, 1])
        NdArray(gt.embedding).value = np.array([2, 2])
        pairs.append(DocGroundtruthPair(doc=doc, groundtruth=gt))
    return pairs
def ground_truth_pairs():
    num_docs = 10
    pairs = []
    for idx in range(num_docs):
        doc = jina_pb2.Document()
        gt = jina_pb2.Document()
        doc.embedding.CopyFrom(array2pb(np.array([1, 1])))
        gt.embedding.CopyFrom(array2pb(np.array([2, 2])))
        pairs.append(DocGroundtruthPair(doc=doc, groundtruth=gt))
    return pairs
def ground_truth_pairs():
    num_docs = 10

    def add_matches(doc: jina_pb2.Document, num_matches):
        for idx in range(num_matches):
            match = doc.matches.add()
            match.tags['id'] = idx

    pairs = []
    for idx in range(num_docs):
        doc = jina_pb2.Document()
        gt = jina_pb2.Document()
        add_matches(doc, num_docs)
        add_matches(gt, num_docs)
        pairs.append(DocGroundtruthPair(doc=doc, groundtruth=gt))
    return pairs
 def create(self):
     return DocGroundtruthPair(
         doc=doc_with_field_type.create(),
         groundtruth=groundtruth_with_field_type.create())