Python Document.id Examples

Programming Language: Python

Namespace/Package Name: jina

Class/Type: Document

Method/Function: id

Examples at hotexamples.com: 26

Python Document.id - 26 examples found. These are the top rated real world Python examples of jina.Document.id extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Document(30)

id(26)

embedding(19)

length(9)

granularity(8)

buffer(8)

modality(6)

parent_id(4)

add_chunk(4)

convert_uri_to_text(3)

convert_uri_to_datauri(3)

convert_uri_to_data_uri(3)

convert_image_blob_to_uri(3)

tags(3)

dict(3)

blob(3)

content(3)

adjacency(3)

score(3)

ParseFromString(2)

from_dict(2)

get_attrs(2)

convert_uri_to_buffer(2)

convert_buffer_to_blob(2)

clear(2)

mime_type(2)

convert_text_to_uri(2)

set_image_blob_normalization(1)

items(1)

set_image_blob_shape(1)

load_uri_to_image_blob(1)

ClearField(1)

from_base64(1)

convert_uri_to_point_cloud_blob(1)

convert_uri_to_image_blob(1)

CopyFrom(1)

convert_image_uri_to_blob(1)

convert_image_datauri_to_blob(1)

convert_image_buffer_to_blob(1)

convert_data_uri_to_blob(1)

convert_content_to_uri(1)

convert_buffer_to_image_blob(1)

convert_buffer_image_to_blob(1)

convert_blob_to_uri(1)

chunks(1)

SerializeToString(1)

convert_uri_to_blob(1)

Example #1

Show file

def create_chunk_matches_to_score():
    # doc: (id: 100, granularity=0)
    # |- chunks: (id: 10)
    # |  |- matches: (id: 11, parent_id: 1, score.value: 2),
    # |  |- matches: (id: 12, parent_id: 1, score.value: 3),
    # |- chunks: (id: 20)
    #    |- matches: (id: 21, parent_id: 2, score.value: 4),
    #    |- matches: (id: 22, parent_id: 2, score.value: 5)
    doc = Document()
    doc.id = '1'
    doc.granularity = 0
    num_matches = 2
    for parent_id in range(1, 3):
        chunk = Document()
        chunk_id = parent_id * 10
        chunk.id = str(chunk_id)
        chunk.granularity = doc.granularity + 1
        for score_value in range(parent_id * 2, parent_id * 2 + num_matches):
            match = Document()
            match.granularity = chunk.granularity
            match.parent_id = str(parent_id)
            match.score = NamedScore(value=score_value, ref_id=chunk.id)
            match.id = str(10 * int(parent_id) + score_value)
            match.length = 4
            chunk.matches.append(match)
        doc.chunks.append(chunk)
    return doc

Example #2

Show file

def create_document_to_score():
    # doc: 1
    # |- chunk: 2
    # |  |- matches: (id: 4, parent_id: 40, score.value: 4),
    # |  |- matches: (id: 5, parent_id: 50, score.value: 5),
    # |
    # |- chunk: 3
    #    |- matches: (id: 6, parent_id: 60, score.value: 6),
    #    |- matches: (id: 7, parent_id: 70, score.value: 7)
    doc = Document()
    doc.id = '1'
    for c in range(2):
        chunk = Document()
        chunk_id = str(c + 2)
        chunk.id = chunk_id
        for m in range(2):
            match = Document()
            match_id = 2 * int(chunk_id) + m
            match.id = str(match_id)
            parent_id = 10 * int(match_id)
            match.parent_id = str(parent_id)
            match.length = int(match_id)
            # to be used by MaxRanker and MinRanker
            match.score = NamedScore(value=int(match_id), ref_id=chunk.id)
            match.tags['price'] = match.score.value
            match.tags['discount'] = DISCOUNT_VAL
            chunk.matches.append(match)
        doc.chunks.append(chunk)
    return doc

Example #3

Show file

def random_queries(num_docs, chunks_per_doc=5):
    for j in range(num_docs):
        d = Document()
        d.id = j
        for k in range(chunks_per_doc):
            dd = Document()
            dd.id = num_docs + j * chunks_per_doc + k
            d.chunks.add(dd)
        yield d

Example #4

Show file

def get_docs_to_delete(doc_id_to_chunk_ids):
    for i, (doc_id, chunks) in enumerate(doc_id_to_chunk_ids.items()):
        document = Document()
        document.id = str(f'{i}' * 16)
        for chunk in chunks:
            document.chunks.append(chunk)
        yield document

Example #5

Show file

def create_document(doc_id, text, weight, length):
    d = Document()
    d.id = str(doc_id)
    d.buffer = text.encode('utf8')
    d.weight = weight
    d.length = length
    return d

Example #6

Show file

def random_docs(start, end, embed_dim=10):
    for j in range(start, end):
        d = Document()
        d.id = f'{j:0>16}'
        d.tags['id'] = j
        d.text = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)).encode('utf8')
        d.embedding = np.random.random([embed_dim])
        yield d

Example #7

Show file

def index_docs():
    docs = []
    for idx in range(0, 100):
        doc = Document()
        doc.id = f'{idx:0>16}'
        doc.embedding = doc.embedding = np.array([idx, idx])
        docs.append(doc)
    return docs

Example #8

Show file

def test_broken_document(segment_driver, text_segmenter_executor):
    segment_driver.attach(executor=text_segmenter_executor, runtime=None)

    invalid_doc = Document()
    invalid_doc.id = 1
    invalid_doc.text = 'invalid'

    with pytest.raises(AttributeError):
        segment_driver._apply_all([DocumentArray([invalid_doc])])

Example #9

Show file

def random_docs(start, end, embed_dim=10, jitter=1, has_content=True):
    for j in range(start, end):
        d = Document()
        d.id = str(f'{j}' * 16)
        if has_content:
            d.tags['id'] = j
            d.text = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)).encode('utf8')
            d.embedding = np.random.random([embed_dim + np.random.randint(0, jitter)])
        yield d

Example #10

Show file

File: test_kv_search_driver.py Project: mrahmands1/jina

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     doc1 = Document()
     doc1.id = str(1) * 16
     doc1.embedding = np.array([int(doc1.id)])
     doc2 = Document()
     doc2.id = str(2) * 16
     doc2.embedding = np.array([int(doc2.id)])
     doc3 = Document()
     doc3.id = str(3) * 16
     doc3.embedding = np.array([int(doc3.id)])
     doc4 = Document()
     doc4.id = str(4) * 16
     doc4.embedding = np.array([int(doc4.id)])
     self.db = {
         id2hash(doc1.id): doc1.SerializeToString(),
         id2hash(doc2.id): doc2.SerializeToString(),
         id2hash(doc3.id): doc3.SerializeToString(),
         id2hash(doc4.id): doc4.SerializeToString()
     }

Example #11

Show file

def test_broken_document():
    driver = SimpleSegmentDriver()
    executor = MockSegmenter()
    driver.attach(executor=executor, runtime=None)

    invalid_doc = Document()
    invalid_doc.id = 1
    invalid_doc.text = 'invalid'

    with pytest.raises(AttributeError):
        driver._apply_all([DocumentSet([invalid_doc])])

Example #12

Show file

def random_docs(start, end):
    documents = []
    for j in range(start, end):
        d = Document()
        d.id = j
        d.tags['id'] = j
        d.text = ''.join(
            random.choice(string.ascii_lowercase)
            for _ in range(10)).encode('utf8')
        d.embedding = np.random.random([10 + np.random.randint(0, 1)])
        documents.append(d)
    return documents

Example #13

Show file

File: test_segmenter_driver.py Project: jubaer145/jina

def test_broken_document():
    driver = SimpleSegmentDriver()
    executor = MockSegmenter()
    driver.attach(executor=executor, pea=None)

    invalid_doc = Document()
    invalid_doc.id = 1
    invalid_doc.text = 'invalid'
    invalid_doc.length = 2

    assert invalid_doc.length == 2

    with pytest.raises(AttributeError):
        driver._apply_all([invalid_doc])

Example #14

Show file

File: test_kv_search_driver.py Project: mrahmands1/jina

def create_document_to_search():
    # 1-D embedding
    # doc: 0
    #   - chunk: 1
    #   - chunk: 2
    #   - chunk: 3
    #   - chunk: 4
    #   - chunk: 5 - will be missing from KV indexer
    doc = Document()
    doc.id = '0' * 16
    for c in range(5):
        chunk = doc.add_chunk()
        chunk.id = str(c + 1) * 16
    return doc

Example #15

Show file

File: test_aggregate_matches_rank_driver.py Project: yaneshtyagi/jina

def create_document_to_score_same_depth_level():
    # doc: 1
    # |  matches: (id: 2, parent_id: 20, score.value: 30, length: 3),
    # |  matches: (id: 3, parent_id: 20, score.value: 40, length: 4),
    # |  matches: (id: 4, parent_id: 30, score.value: 20, length: 2),
    # |  matches: (id: 5, parent_id: 30, score.value: 10, length: 1),

    doc = Document()
    doc.id = 1

    for match_id, parent_id, match_score, weight in [
        (2, 20, 30, 3),
        (3, 20, 40, 4),
        (4, 30, 20, 2),
        (5, 30, 10, 1),
    ]:
        match = Document()
        match.id = match_id
        match.parent_id = parent_id
        match.weight = weight
        match.score = NamedScore(value=match_score, ref_id=doc.id)
        doc.matches.append(match)
    return doc

Example #16

Show file

def create_document_to_search():
    # 1-D embedding
    # doc: 1 - chunk: 2 - embedding(2.0)
    #        - chunk: 3 - embedding(3.0)
    #        - chunk: 4 - embedding(4.0)
    #        - chunk: 5 - embedding(5.0)
    # ....
    doc = Document()
    for c in range(10):
        chunk = Document()
        chunk.id = str(c) * 16
        chunk.embedding = np.array([c])
        doc.chunks.append(chunk)
    return doc

Example #17

Show file

def document():
    # 1-D embedding
    # doc: 0
    #   - chunk: 1
    #   - chunk: 2
    #   - chunk: 3
    #   - chunk: 4
    #   - chunk: 5 - will be missing from KV indexer
    doc = Document()
    doc.id = '0' * 16
    for c in range(5):
        with Document() as chunk:
            chunk.id = str(c + 1) * 16
        doc.chunks.add(chunk)
    return doc

Example #18

Show file

File: test_kv_search_driver.py Project: mrahmands1/jina

def create_document_to_search_with_matches_on_chunks():
    # 1-D embedding
    # doc: 0
    #   - chunk: 1
    #     - match: 2
    #     - match: 3
    #     - match: 4
    #     - match: 5 - will be missing from KV indexer
    #     - match: 6 - will be missing from KV indexer
    doc = Document()
    doc.id = '0' * 16
    chunk = doc.add_chunk()
    chunk.id = '1' * 16
    for m in range(5):
        match = chunk.add_match(doc_id=str(m + 2) * 16, score_value=1.)
    return doc

Example #19

Show file

def create_document_to_score():
    # doc: 1
    # |- matches: (id: 2, parent_id: 1, score.value: 2),
    # |- matches: (id: 3, parent_id: 1, score.value: 3),
    # |- matches: (id: 4, parent_id: 1, score.value: 4),
    # |- matches: (id: 5, parent_id: 1, score.value: 5),
    doc = Document()
    doc.id = '1' * 16
    doc.length = 5
    for match_id, match_score in [(2, 3), (3, 6), (4, 1), (5, 8)]:
        with Document() as match:
            match.id = str(match_id) * 16
            match.length = match_score
            match.score.value = match_score
            doc.matches.append(match)
    return doc

Example #20

Show file

def create_document_to_search_with_matches_on_chunks():
    # 1-D embedding
    # doc: 0
    #   - chunk: 1
    #     - match: 2
    #     - match: 3
    #     - match: 4
    #     - match: 5 - will be missing from KV indexer
    #     - match: 6 - will be missing from KV indexer
    doc = Document()
    doc.id = '0' * 16
    chunk = doc.chunks.append()
    chunk.id = '1' * 16
    for m in range(5):
        d = Document(id=str(m + 2) * 16)
        d.score.value = 1.
        chunk.matches.append(d)
    return doc

Example #21

Show file

File: test_matches2doc_rank_drivers.py Project: winstonww/jina

def create_document_to_score():
    # doc: 1
    # |- matches: (id: 2, parent_id: 1, score.value: 2),
    # |- matches: (id: 3, parent_id: 1, score.value: 3),
    # |- matches: (id: 4, parent_id: 1, score.value: 4),
    # |- matches: (id: 5, parent_id: 1, score.value: 5),
    doc = Document()
    doc.id = '1' * 20
    for match_id, match_score, match_length in [
        (2, 3, 16),
        (3, 6, 24),
        (4, 1, 8),
        (5, 8, 16),
    ]:
        with Document() as match:
            match.id = match_id
            match.score = NamedScore(value=match_score, ref_id=doc.id)
            match.weight = match_length
            doc.matches.append(match)
    return doc

Example #22

Show file

File: test_evaluation_from_file.py Project: zatcsc/jina

def evaluate_docs():
    """Evaluate Documents:
        doc: id = 00
             tag__groundtruth = False
             text = aaa
        doc: id = 01
             tag__groundtruth = False
             text = aaa
        doc: id = 02
             tag__groundtruth = False
             text = aaa
        ...
    """
    docs = []
    for idx in range(0, 100):
        doc = Document()
        doc.id = f'{idx:0>16}'
        doc.tags['groundtruth'] = False
        doc.text = 'aaa'
        docs.append(doc)
    return docs

Example #23

Show file

File: test_evaluation_from_file.py Project: zatcsc/jina

def index_groundtruth():
    """Index Groundtruth:
        doc: id = 00
             tag__groundtruth = True
             text = aa
        doc: id = 01
             tag__groundtruth = True
             text = aa
        doc: id = 02
             tag__groundtruth = True
             text = aa
        ... we will not have groundtruth for id 5, 10, 50
    """
    docs = []
    for idx in range(0, 100):
        doc = Document()
        doc.id = f'{idx:0>16}'
        doc.tags['groundtruth'] = True
        doc.text = 'aa'
        if idx not in (5, 10, 50):
            docs.append(doc)
    return docs

Example #24

Show file

 def documents(embedding_cls_type):
     doc = Document()
     for c in range(10):
         chunk = Document()
         chunk.id = str(c) * 16
         dense_embedding = np.random.random([10])
         if embedding_cls_type == 'dense':
             chunk.embedding = dense_embedding
         elif embedding_cls_type == 'scipy_csr':
             chunk.embedding = scipy.sparse.csr_matrix(dense_embedding)
         elif embedding_cls_type == 'scipy_coo':
             chunk.embedding = scipy.sparse.coo_matrix(dense_embedding)
         elif embedding_cls_type == 'torch':
             sparse_embedding = scipy.sparse.coo_matrix(dense_embedding)
             values = sparse_embedding.data
             indices = np.vstack(
                 (sparse_embedding.row, sparse_embedding.col))
             chunk.embedding = torch.sparse_coo_tensor(
                 indices,
                 values,
                 sparse_embedding.shape,
             )
         elif embedding_cls_type == 'tf':
             sparse_embedding = scipy.sparse.coo_matrix(dense_embedding)
             values = sparse_embedding.data
             indices = [
                 (x, y)
                 for x, y in zip(sparse_embedding.row, sparse_embedding.col)
             ]
             chunk.embedding = tf.SparseTensor(
                 indices=indices,
                 values=values,
                 dense_shape=[1, 10],
             )
         doc.chunks.append(chunk)
     return doc

Example #25

Show file

def create_document(doc_id, text, weight):
    d = Document()
    d.id = str(doc_id)
    d.buffer = text.encode('utf8')
    d.weight = weight
    return d

Example #26

Show file

def get_docs_to_index(contents):
    for i, content in enumerate(contents):
        document = Document()
        document.id = str(f'{i}' * 16)
        document.text = content
        yield document