def da_and_dam(): rand_docs = random_docs(100) da = DocumentArray() da.extend(rand_docs) rand_docs = random_docs(100) dam = DocumentArrayMemmap() dam.extend(rand_docs) return da, dam
def docarray_for_nest_split(): da = DocumentArray() da.append(Document(tags={'nest': {'category': 'c'}})) da.append(Document(tags={'nest': {'category': 'c'}})) da.append(Document(tags={'nest': {'category': 'b'}})) da.append(Document(tags={'nest': {'category': 'a'}})) da.append(Document(tags={'nest': {'category': 'a'}})) dam = DocumentArrayMemmap() dam.extend(da) return da, dam
def docarray_for_split_at_zero(): da = DocumentArray() da.append(Document(tags={'category': 0.0})) da.append(Document(tags={'category': 0.0})) da.append(Document(tags={'category': 1.0})) da.append(Document(tags={'category': 2.0})) da.append(Document(tags={'category': 2.0})) dam = DocumentArrayMemmap() dam.extend(da) return da, dam
def test_filter_fn_traverse_flat(filter_fn, docs_len, doc_req, use_dam, tmp_path): if use_dam: docs = DocumentArrayMemmap(tmp_path) docs.extend(doc_req.docs) else: docs = doc_req.docs ds = list(docs.traverse_flat('r,c,m,cm', filter_fn=filter_fn)) assert len(ds) == docs_len assert all(isinstance(d, Document) for d in ds)
def da_and_dam(): da = DocumentArray( [ Document(text='hello'), Document(text='hello world'), Document(text='goodbye world!'), ] ) dam = DocumentArrayMemmap() dam.extend(da) return da, dam
def da_and_dam(): embeddings = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) doc_array = DocumentArray( [ Document(embedding=x, tags={'label': random.randint(0, 5)}) for x in embeddings ] ) dam = DocumentArrayMemmap() dam.extend(doc_array) return doc_array, dam
def test_filter_fn_traverse_flat_per_path(filter_fn, doc_req, docs_len, use_dam, tmp_path): if use_dam: docs = DocumentArrayMemmap(tmp_path) docs.extend(doc_req.docs) else: docs = doc_req.docs ds = list(docs.traverse_flat_per_path('r,c,m,cm', filter_fn=filter_fn)) assert len(ds) == 4 for seq, length in zip(ds, docs_len): assert isinstance(seq, DocumentArray if not use_dam else itertools.chain) assert len(list(seq)) == length
def test_sprite_image_generator(pytestconfig, tmpdir): da = DocumentArray( from_files( [ f'{pytestconfig.rootdir}/.github/**/*.png', f'{pytestconfig.rootdir}/.github/**/*.jpg', ] ) ) da.plot_image_sprites(tmpdir / 'sprint_da.png') assert os.path.exists(tmpdir / 'sprint_da.png') dam = DocumentArrayMemmap() dam.extend(da) dam.plot_image_sprites(tmpdir / 'sprint_dam.png') assert os.path.exists(tmpdir / 'sprint_dam.png')
def da_for_batching(): da = DocumentArray.empty(100) dam = DocumentArrayMemmap.empty(100) return da, dam
def da_and_dam(): da = DocumentArray(random_docs(100)) dam = DocumentArrayMemmap() dam.extend(da) return da, dam
def da_and_dam(N): da = DocumentArray.empty(N) dam = DocumentArrayMemmap.empty(N) return da, dam
def foo(self, docs, **kwargs): dam = DocumentArrayMemmap(tmpdir + '/dam') dam.extend(docs) return dam
def test_empty_non_zero(): da = DocumentArray.empty(10) assert len(da) == 10 dam = DocumentArrayMemmap.empty(10) assert len(dam) == 10
def test_empty_zero(): da = DocumentArray.empty() assert len(da) == 0 dam = DocumentArrayMemmap.empty() assert len(dam) == 0