Beispiel #1
0
def test_content_hash_not_dependent_on_chunks_or_matches():
    doc1 = Document()
    doc1.content = 'one'
    doc1.update_content_hash()

    doc2 = Document()
    doc2.content = 'one'
    doc2.update_content_hash()
    assert doc1.content_hash == doc2.content_hash

    doc3 = Document()
    doc3.content = 'one'
    for _ in range(3):
        with Document() as m:
            m.content = 'some chunk'
        doc3.chunks.append(m)
    doc3.update_content_hash()
    assert doc1.content_hash == doc3.content_hash

    doc4 = Document()
    doc4.content = 'one'
    for _ in range(3):
        with Document() as m:
            m.content = 'some match'
        doc4.matches.append(m)
    doc4.update_content_hash()
    assert doc1.content_hash == doc4.content_hash
Beispiel #2
0
def test_uri_get_set():
    a = Document()
    a.uri = 'https://abc.com/a.jpg'
    assert a.uri == 'https://abc.com/a.jpg'
    assert a.mime_type == 'image/jpeg'
    a.uri = 'abcdefg'
    assert a.uri == 'abcdefg'
    a.content = 'abcdefg'
    assert a.text == 'abcdefg'
    assert not a.uri
Beispiel #3
0
def test_sparse_get_set():
    d = Document()
    assert d.content is None
    mat1 = coo_matrix(np.array([1, 2, 3]))
    d.content = mat1
    assert (d.content != mat1).nnz == 0
    mat2 = coo_matrix(np.array([3, 2, 1]))
    assert (d.content != mat2).nnz != 0
    d.blob = mat2
    assert (d.content != mat2).nnz == 0