Esempio n. 1
0
def memmap_with_text_and_embedding(tmpdir):
    dam = DocumentArrayMemmap(tmpdir)
    for idx in range(100):
        d = Document(text=f'random text {idx}', embedding=np.random.rand(512))
        dam.append(d)
    yield dam
    dam.clear()
Esempio n. 2
0
def test_issue_3527_delete_and_match(tmpdir):
    dam = DocumentArrayMemmap(tmpdir)

    dam.append(Document(id='a', embedding=np.array([1, 2, 3], dtype=np.float32)))
    del dam['a']

    dam.append(Document(id='c', embedding=np.array([1, 2, 3], dtype=np.float32)))
    da = DocumentArray([Document(embedding=np.array([5, 6, 7], dtype=np.float32))])
    da.match(dam)
    assert da[0].matches[0].id == 'c'
Esempio n. 3
0
def test_batch_iterator_dam(tmpdir):
    dam = DocumentArrayMemmap(tmpdir)
    for i in range(4):
        dam.append(Document(id=i))
    bi = batch_iterator(dam, 2)
    expected_iterator = iter(range(4))
    for batch in bi:
        for doc in batch:
            assert int(doc.id) == next(expected_iterator)

    # expect that expected_iterator is totally consumed
    with pytest.raises(StopIteration):
        next(expected_iterator)
Esempio n. 4
0
def test_memmap_append_extend(tmpdir):
    dam = DocumentArrayMemmap(tmpdir)
    docs = list(random_docs(100))
    assert len(dam) == 0
    for d in docs[:40]:
        dam.append(d)
    assert len(dam) == 40
    for d1, d2 in zip(docs[:40], dam):
        assert d1.proto == d2.proto
    dam.extend(docs[40:])
    assert len(dam) == 100
    for d1, d2 in zip(docs, dam):
        assert d1.proto == d2.proto
Esempio n. 5
0
def test_memmap_mutate(tmpdir):
    da = DocumentArrayMemmap(tmpdir)
    d0 = Document(text='hello')
    da.append(d0)
    assert da[0] == d0
    d1 = Document(text='world')
    da.append(d1)
    assert da[1] == d1

    da2 = DocumentArrayMemmap(tmpdir)
    assert len(da2) == 2
    assert da2[0] == d0
    assert da2[1] == d1

    da.clear()
    assert not len(da)
Esempio n. 6
0
def test_buffer_dam_lru(tmpdir):
    dam = DocumentArrayMemmap(tmpdir, buffer_pool_size=5)
    docs = list(random_docs(6))
    dam.extend(docs[:5])

    # make the first doc most recently used, the second doc is the LRU
    doc1 = dam[0]
    assert next(reversed(dam.buffer_pool.doc_map.keys())) == doc1.id
    assert next(iter(dam.buffer_pool.doc_map.keys())) == docs[1].id

    doc2 = docs[1]

    assert doc1.id == docs[0].id
    dam.append(docs[5])

    # doc1 was not LRU, doc2 was LRU
    assert doc1.id in dam.buffer_pool
    assert doc2.id not in dam.buffer_pool
    assert docs[5].id in dam.buffer_pool
Esempio n. 7
0
def memmap_for_split(tmpdir):
    da = DocumentArrayMemmap(tmpdir)
    da.append(Document(tags={'category': 'c'}))
    da.append(Document(tags={'category': 'c'}))
    da.append(Document(tags={'category': 'b'}))
    da.append(Document(tags={'category': 'a'}))
    da.append(Document(tags={'category': 'a'}))
    return da
Esempio n. 8
0
def test_memmap_physical_size(tmpdir):
    da = DocumentArrayMemmap(tmpdir)
    assert da.physical_size == 0
    da.append(Document())
    assert da.physical_size > 0