Esempio n. 1
0
def test_compositional_dump():
    a = CompoundExecutor()
    a.components = lambda: [BaseExecutor(), BaseExecutor()]
    assert a.name
    a.touch()
    a.save()
    a.save_config()
    assert Path(a.save_abspath).exists()
    assert Path(a.config_abspath).exists()
    rm_files([a.save_abspath, a.config_abspath])
Esempio n. 2
0
def test_compositional_dump(test_metas):
    a = CompoundExecutor(metas=test_metas)
    a.components = lambda: [BaseExecutor(), BaseExecutor()]
    assert a.name
    a.touch()
    a.save()
    a.save_config()
    assert os.path.exists(a.save_abspath)
    assert os.path.exists(a.config_abspath)
    rm_files([a.save_abspath, a.config_abspath])
Esempio n. 3
0
 def test_compositional_dump(self):
     a = CompoundExecutor()
     a.components = lambda: [BaseExecutor(), BaseExecutor()]
     self.assertIsNotNone(a.name)
     self.tmp_files.append(a.save_abspath)
     self.tmp_files.append(a.config_abspath)
     a.touch()
     a.save()
     a.save_config()
     self.assertTrue(os.path.exists(a.save_abspath))
     self.assertTrue(os.path.exists(a.config_abspath))
Esempio n. 4
0
def validate_index_size(num_indexed_docs):
    from jina.executors.compound import CompoundExecutor

    path_compound = Path(
        CompoundExecutor.get_component_workspace_from_compound_workspace(
            os.environ['JINA_REST_DIR'], 'chunk_indexer', 0))
    path = Path(os.environ['JINA_REST_DIR'])
    bin_files = list(path_compound.glob('*.bin')) + list(path.glob('*.bin'))
    assert len(bin_files) > 0
    for index_file in bin_files:
        index = BaseIndexer.load(str(index_file))
        assert index.size == num_indexed_docs
Esempio n. 5
0
def check_indexers_size(chunks, nr_docs, field, tmp_path, same_content, shards,
                        post_op):
    cache_indexer_path = tmp_path / 'cache.bin'
    cache_full_size = 0
    with BaseIndexer.load(cache_indexer_path) as cache:
        assert isinstance(cache, DocIDCache)
        cache_full_size = cache.size
        print(f'cache size {cache.size}')

    for indexer_fname in [KV_IDX_FILENAME, VEC_IDX_FILENAME]:
        indexers_full_size = 0
        for i in range(shards):
            from jina.executors.compound import CompoundExecutor
            compound_name = 'inc_docindexer' if KV_IDX_FILENAME in indexer_fname else 'inc_vecindexer'
            workspace_folder = CompoundExecutor.get_component_workspace_from_compound_workspace(
                tmp_path, compound_name, i + 1 if shards > 1 else 0)
            indexer_path = os.path.join(
                BaseIndexer.get_shard_workspace(
                    workspace_folder=workspace_folder,
                    workspace_name=indexer_fname.rstrip('.bin'),
                    pea_id=i + 1 if shards > 1 else 0), f'{indexer_fname}')

            # in the configuration of content-hash / same_content=True
            # there aren't enough docs to satisfy batch size, only 1 shard will have it
            if os.path.exists(indexer_path):
                with BaseIndexer.load(indexer_path) as indexer:
                    if indexer_fname == KV_IDX_FILENAME:
                        assert isinstance(indexer, BinaryPbIndexer)
                    else:
                        assert isinstance(indexer, NumpyIndexer)
                    indexers_full_size += indexer.size

        if post_op == 'delete':
            assert indexers_full_size == 0
            assert cache_full_size == 0
        else:
            if field == 'content_hash' and same_content:
                if chunks > 0:
                    # one content from Doc, one from chunk
                    expected = 2
                    assert indexers_full_size == expected
                    assert cache_full_size == 2
                else:
                    assert indexers_full_size == 1
                    assert cache_full_size == 1
            else:
                nr_expected = (nr_docs + chunks * nr_docs) * 2 if post_op == 'index2' \
                    else nr_docs + chunks * nr_docs
                assert indexers_full_size == nr_expected
                assert cache_full_size == nr_expected
Esempio n. 6
0
    def test_compositional_route(self):
        da = dummyA()
        db = dummyB()
        a = CompoundExecutor()

        a.components = lambda: [da, db]
        assert a.say_all() == ['a', 'b']
        with self.assertRaises(AttributeError):
            a.say()

        b = CompoundExecutor({'say': {da.name: 'say'}})
        b.components = lambda: [da, db]
        assert b.say_all() == ['a', 'b']
        assert b.say() == 'a'
        b.add_route('say', db.name, 'say')
        assert b.say() == 'b'
        b.save_config()
        self.assertTrue(os.path.exists(b.config_abspath))

        c = BaseExecutor.load_config(b.config_abspath)
        assert c.say_all() == ['a', 'b']
        assert c.say() == 'a'

        b.add_route('say', db.name, 'say', is_stored=True)
        b.save_config()
        c = BaseExecutor.load_config(b.config_abspath)
        assert c.say_all() == ['a', 'b']
        assert c.say() == 'b'

        b.touch()
        b.save()
        self.assertTrue(os.path.exists(b.save_abspath))

        d = BaseExecutor.load(b.save_abspath)
        assert d.say_all() == ['a', 'b']
        assert d.say() == 'b'

        self.tmp_files.append(b.config_abspath)
        self.tmp_files.append(b.save_abspath)
Esempio n. 7
0
def test_compositional_route(monkeypatch):
    monkeypatch.setattr(BaseExecutor, 'exec_methods', ['say'])
    da = DummyA()
    db = DummyB()
    a = CompoundExecutor()

    a.components = lambda: [da, db]
    assert a.say_all() == ['a', 'b']
    with pytest.raises(AttributeError):
        a.say()

    b = CompoundExecutor({'say': {da.name: 'say'}})
    b.components = lambda: [da, db]
    assert b.say_all() == ['a', 'b']
    assert b.say() == 'a'
    b.add_route('say', db.name, 'say')
    assert b.say() == 'b'
    b.save_config()
    assert Path(b.config_abspath).exists()

    c = BaseExecutor.load_config(b.config_abspath)
    assert c.say_all() == ['a', 'b']
    assert c.say() == 'a'

    b.add_route('say', db.name, 'say', is_stored=True)
    b.save_config()
    c = BaseExecutor.load_config(b.config_abspath)
    assert c.say_all() == ['a', 'b']
    assert c.say() == 'b'

    b.touch()
    b.save()
    assert Path(b.save_abspath).exists()

    d = BaseExecutor.load(b.save_abspath)
    assert d.say_all() == ['a', 'b']
    assert d.say() == 'b'

    rm_files([b.save_abspath, b.config_abspath])