Esempio n. 1
0
def test_cache_content_driver_same_content(tmpdir):
    doc1 = Document(id=1)
    doc1.text = 'blabla'
    doc1.update_content_hash()
    docs1 = DocumentSet([doc1])

    doc2 = Document(id=2)
    doc2.text = 'blabla'
    doc2.update_content_hash()
    docs2 = DocumentSet([doc2])
    assert doc1.content_hash == doc2.content_hash

    driver = MockBaseCacheDriver()
    filename = None

    with DocIDCache(tmpdir, field=CONTENT_HASH_KEY) as executor:
        driver.attach(executor=executor, runtime=None)
        driver._traverse_apply(docs1)

        with pytest.raises(NotImplementedError):
            driver._traverse_apply(docs2)

        assert executor.size == 1
        filename = executor.save_abspath

    # update
    old_doc = Document(id=9999)
    old_doc.text = 'blabla'
    old_doc.update_content_hash()

    new_string = 'blabla-new'
    doc1.text = new_string
    doc1.update_content_hash()
    with BaseExecutor.load(filename) as executor:
        executor.update([UniqueId(1)], [doc1.content_hash])

    with BaseExecutor.load(filename) as executor:
        assert executor.query(doc1.content_hash) is True
        assert executor.query(old_doc.content_hash) is None

    # delete
    with BaseExecutor.load(filename) as executor:
        executor.delete([UniqueId(doc1.id)])

    with BaseExecutor.load(filename) as executor:
        assert executor.query(doc1.content_hash) is None
Esempio n. 2
0
 def test_save_and_load_config(self):
     encoder = self.get_encoder()
     if encoder is None:
         return
     encoder.save_config()
     self.assertTrue(os.path.exists(encoder.config_abspath))
     encoder_loaded = BaseExecutor.load_config(encoder.config_abspath)
     self.assertEqual(encoder_loaded.channel_axis, encoder.channel_axis)
Esempio n. 3
0
def test_share_workspace(tmpdir, replica_id):
    with BaseExecutor.load_config('yaml/test-workspace.yml', True,
                                  replica_id) as executor:
        executor.touch()
        executor_dir = tmpdir.join(
            f'{executor.name}-{replica_id}-{executor.name}.bin')
        executor.save(executor_dir)
        assert os.path.exists(executor_dir)
Esempio n. 4
0
def get_indexers(tmpdir):
    indexer_from_constructor = LevelDBIndexer(level='doc',
                                              index_filename=Path(tmpdir) /
                                              'leveldb.db')
    from jina.executors import BaseExecutor
    indexer_from_config = BaseExecutor.load_config(
        str(cur_dir / 'yaml/test-leveldb.yml'))
    return indexer_from_constructor, indexer_from_config
Esempio n. 5
0
def test_cache_content_driver_same_content(tmpdir, test_metas):
    doc1 = Document(id='1')
    doc1.text = 'blabla'
    doc1.update_content_hash()
    docs1 = DocumentSet([doc1])

    doc2 = Document(id='2')
    doc2.text = 'blabla'
    doc2.update_content_hash()
    docs2 = DocumentSet([doc2])
    assert doc1.content_hash == doc2.content_hash

    driver = MockBaseCacheDriver()

    with DocCache(tmpdir, metas=test_metas,
                  fields=(CONTENT_HASH_KEY, )) as executor:
        driver.attach(executor=executor, runtime=None)
        driver._apply_all(docs1)

        with pytest.raises(NotImplementedError):
            driver._apply_all(docs2)

        assert executor.size == 1
        filename = executor.save_abspath

    # update
    old_doc = Document(id=9999)
    old_doc.text = 'blabla'
    old_doc.update_content_hash()

    new_string = 'blabla-new'
    doc1.text = new_string
    doc1.update_content_hash()
    with BaseExecutor.load(filename) as executor:
        executor.update(['1'], [doc1.content_hash])

    with BaseExecutor.load(filename) as executor:
        assert executor.query(doc1.content_hash) is True
        assert executor.query(old_doc.content_hash) is False

    # delete
    with BaseExecutor.load(filename) as executor:
        executor.delete([doc1.id])

    with BaseExecutor.load(filename) as executor:
        assert executor.query(doc1.content_hash) is False
def test_save_and_load_config():
    encoder = OneHotTextEncoder(workspace=os.environ['TEST_WORKDIR'])
    encoder.save_config()
    assert os.path.exists(encoder.config_abspath)

    encoder_loaded = BaseExecutor.load_config(encoder.config_abspath)
    assert encoder_loaded.dim == encoder.dim
    add_tmpfile(encoder_loaded.config_abspath, encoder_loaded.save_abspath)
Esempio n. 7
0
    def test_pod_new_api_from_kwargs(self):
        a = BaseExecutor.load_config('mwu-encoder/mwu_encoder_driver.yml')
        assert a._drivers['ControlRequest'][
            0].__class__.__name__ == 'MyAwesomeDriver'

        with Pod(uses=os.path.join(cur_dir,
                                   'mwu-encoder/mwu_encoder_driver.yml')):
            # will print a cust msg from the driver when terminate
            pass
Esempio n. 8
0
    def test_save_and_load_config(self):
        encoder = OneHotTextEncoder(workspace=os.environ['TEST_WORKDIR'])
        encoder.save_config()
        self.assertTrue(os.path.exists(encoder.config_abspath))

        encoder_loaded = BaseExecutor.load_config(encoder.config_abspath)
        self.assertEqual(encoder_loaded.dim, encoder.dim)

        self.add_tmpfile(encoder_loaded.config_abspath, encoder_loaded.save_abspath)
Esempio n. 9
0
def test_pod_new_api_from_kwargs():
    a = BaseExecutor.load_config(
        str(cur_dir / 'mwu-encoder/mwu_encoder_driver.yml'))
    assert a._drivers['ControlRequest'][
        0].__class__.__name__ == 'MyAwesomeDriver'

    with Pod(uses=str(cur_dir / 'mwu-encoder/mwu_encoder_driver.yml')):
        # will print a cust task_name from the driver when terminate
        pass
def test_textpaddlehubencoder_save_and_load(mocker):
    encoder = TextPaddlehubEncoder()
    encoder.touch()
    encoder.save()
    assert os.path.exists(encoder.save_abspath)
    encoder_loaded = BaseExecutor.load(encoder.save_abspath)
    assert encoder_loaded.model_name == encoder.model_name
    add_tmpfile(encoder.save_abspath, encoder.config_abspath)
    teardown()
Esempio n. 11
0
 def test_share_workspace(self):
     for j in range(3):
         a = BaseExecutor.load_config('yaml/test-workspace.yml', True, j)
         a.touch()
         a.save()
         self.assertTrue(
             os.path.exists('%s-%s/%s.bin' % (a.name, j, a.name)))
         self.add_tmpfile('%s-%s/%s.bin' % (a.name, j, a.name))
         self.add_tmpfile('%s-%s' % (a.name, j))
Esempio n. 12
0
def test_incremental_indexing_sequential_indexers(random_workspace):
    total_docs = 20
    duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs)

    f = (Flow().add(uses=os.path.join(cur_dir, 'uniq_vectorindexer.yml')).add(
        uses=os.path.join(cur_dir, 'uniq_docindexer.yml')))

    with f:
        f.index(duplicate_docs[:10])
        f.index(duplicate_docs)

    with BaseExecutor.load(random_workspace / 'vec_idx.bin') as vector_indexer:
        assert isinstance(vector_indexer, NumpyIndexer)
        assert vector_indexer._size == num_uniq_docs

    with BaseExecutor.load(random_workspace / 'doc_idx.bin') as doc_indexer:
        assert isinstance(doc_indexer, BinaryPbIndexer)
        assert doc_indexer._size == num_uniq_docs
Esempio n. 13
0
 def test_joint_indexer(self):
     b = BaseExecutor.load_config('yaml/test-joint.yml')
     print(b[0].name)
     print(type(b[0]))
     print(b._drivers['SearchRequest'][0]._executor_name)
     print(b._drivers['SearchRequest'])
     b.attach(pea=None)
     self.assertEqual(b._drivers['SearchRequest'][0]._exec, b[0])
     self.assertEqual(b._drivers['SearchRequest'][-1]._exec, b[1])
Esempio n. 14
0
 def test_share_workspace(self):
     for j in range(3):
         a = BaseExecutor.load_config(
             os.path.join(cur_dir, 'yaml/test-workspace.yml'), True, j)
         a.touch()
         a.save()
         self.assertTrue(os.path.exists(f'{a.name}-{j}/{a.name}.bin'))
         self.add_tmpfile(f'{a.name}-{j}/{a.name}.bin')
         self.add_tmpfile(f'{a.name}-{j}')
Esempio n. 15
0
 def test_compound_from_yaml(self):
     a = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/npvec.yml'))
     for c in a.components:
         self.add_tmpfile(c.index_abspath)
     self.assertTrue(isinstance(a, CompoundExecutor))
     self.assertTrue(callable(getattr(a, 'add')))
     self.assertTrue(callable(getattr(a, 'query')))
     self.assertTrue(callable(getattr(a, 'meta_add')))
     self.assertTrue(callable(getattr(a, 'meta_query')))
Esempio n. 16
0
def test_joint_indexer():
    b = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-joint.yml'))
    print(b[0].name)
    print(type(b[0]))
    print(b._drivers['SearchRequest'][0]._executor_name)
    print(b._drivers['SearchRequest'])
    b.attach(pea=None)
    assert b._drivers['SearchRequest'][0]._exec == b[0]
    assert b._drivers['SearchRequest'][-1]._exec == b[1]
Esempio n. 17
0
    def test_load_external(self):
        from jina.executors import BaseExecutor
        self.assertRaises(ruamel.yaml.constructor.ConstructorError,
                          BaseExecutor.load_config,
                          os.path.join(cur_dir, 'yaml/dummy_ext_exec.yml'))

        b = BaseExecutor.load_config(
            os.path.join(cur_dir, 'yaml/dummy_ext_exec_sucess.yml'))
        self.assertEqual(b.__class__.__name__, 'DummyExternalIndexer')
Esempio n. 18
0
def test_shard_workspace(test_workspace, pea_id):
    tmpdir = os.environ['JINA_TEST_WORKSPACE']
    with BaseExecutor.load_config(os.path.join(cur_dir,
                                               'yaml/test-workspace.yml'),
                                  pea_id=pea_id) as executor:
        executor.index_filename = 'index_filename'
        executor.touch()
    if pea_id > 0:
        assert os.path.exists(
            os.path.join(tmpdir, f'{executor.name}-{executor.pea_id}',
                         f'{executor.name}.bin'))
    else:
        assert os.path.exists(os.path.join(tmpdir, f'{executor.name}.bin'))

    with BaseExecutor.load_config(os.path.join(cur_dir,
                                               'yaml/test-workspace.yml'),
                                  pea_id=pea_id) as executor:
        assert executor.index_filename == 'index_filename'
Esempio n. 19
0
def test_share_workspace(tmpdir, pea_id):
    with BaseExecutor.load_config('yaml/test-workspace.yml',
                                  separated_workspace=True,
                                  pea_id=pea_id) as executor:
        executor.touch()
        executor_dir = Path(
            tmpdir) / f'{executor.name}-{pea_id}-{executor.name}.bin'
        executor.save(str(executor_dir))
        assert executor_dir.exists()
Esempio n. 20
0
def test_compound_from_yaml():
    a = BaseExecutor.load_config(str(cur_dir / 'yaml/npvec.yml'))
    assert isinstance(a, CompoundExecutor)
    assert callable(getattr(a, 'add'))
    assert callable(getattr(a, 'query'))
    assert callable(getattr(a, 'meta_add'))
    assert callable(getattr(a, 'meta_query'))
    rm_files([c.index_abspath for c in a.components])
    rm_files(['test-workspace'])
Esempio n. 21
0
def test_save_and_load(*args, **kwargs):
    encoder = FlairTextEncoder(embeddings=('word:glove', ),
                               pooling_strategy='mean')
    encoder.touch()
    encoder.save()
    assert os.path.exists(encoder.save_abspath)
    encoder_loaded = BaseExecutor.load(encoder.save_abspath)
    assert encoder_loaded.embeddings == encoder.embeddings
    rm_files([encoder.config_abspath, encoder.save_abspath])
Esempio n. 22
0
def test_videopaddlehubencoder_save_and_load(*args, **kwargs):
    encoder = get_encoder()
    encoder.touch()
    encoder.save()
    assert os.path.exists(encoder.save_abspath)
    encoder_loaded = BaseExecutor.load(encoder.save_abspath)
    assert encoder_loaded.model_name == encoder.model_name
    add_tmpfile(encoder.save_abspath, encoder.config_abspath)
    teardown()
Esempio n. 23
0
 def test_load_cust_with_driver(self):
     a = BaseExecutor.load_config('mwu-encoder/mwu_encoder_driver.yml')
     self.assertEqual(a._drivers['ControlRequest'][0].__class__.__name__,
                      'MyAwesomeDriver')
     p = set_pod_parser().parse_args(
         ['--yaml-path', 'mwu-encoder/mwu_encoder_driver.yml'])
     with Pod(p):
         # will print a cust msg from the driver when terminate
         pass
Esempio n. 24
0
def test_incremental_indexing_parallel_indexers_with_shards(
        random_workspace, restful):
    total_docs = 1000
    duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs)

    num_shards = 4

    # can't use plain _unique in uses_before because workspace will conflict with other
    f = (Flow(restful=restful).add(
        uses=os.path.join(cur_dir, 'vectorindexer.yml'),
        uses_before=os.path.join(cur_dir, '_unique_vec.yml'),
        shards=num_shards,
        name='inc_vec',
        separated_workspace=True).add(
            uses=os.path.join(cur_dir, 'docindexer.yml'),
            uses_before=os.path.join(cur_dir, '_unique_doc.yml'),
            shards=num_shards,
            name='inc_doc',
            needs=['gateway'],
            separated_workspace=True).add(needs=['inc_vec', 'inc_doc']))

    with f:
        f.index(duplicate_docs[:500])

    with f:
        f.index(duplicate_docs)

    vect_idx_size = 0
    for shard_idx in range(num_shards):
        save_abspath = (random_workspace / f'vec_idx-{shard_idx + 1}' /
                        'vec_idx.bin')
        with BaseExecutor.load(save_abspath) as vector_indexer:
            assert isinstance(vector_indexer, NumpyIndexer)
            vect_idx_size += vector_indexer._size
    assert vect_idx_size == num_uniq_docs

    doc_idx_size = 0
    for shard_idx in range(num_shards):
        save_abspath = (random_workspace / f'doc_idx-{shard_idx + 1}' /
                        'doc_idx.bin')
        with BaseExecutor.load(save_abspath) as doc_indexer:
            assert isinstance(doc_indexer, BinaryPbIndexer)
            doc_idx_size += doc_indexer._size
    assert doc_idx_size == num_uniq_docs
Esempio n. 25
0
def test_save_and_load(encoder):
    test_data = np.random.rand(num_samples, 3, input_dim, input_dim)
    encoded_data_control = encoder.encode(test_data)
    encoder.touch()
    encoder.save()
    assert os.path.exists(encoder.save_abspath)
    encoder_loaded = BaseExecutor.load(encoder.save_abspath)
    encoded_data_test = encoder_loaded.encode(test_data)
    assert encoder_loaded.raw_model_path == encoder.raw_model_path
    np.testing.assert_array_equal(encoded_data_control, encoded_data_test)
Esempio n. 26
0
def test_incremental_indexing_parallel_indexers_with_shards(tmpdir):
    os.environ['JINA_TEST_INCREMENTAL_INDEX_WORKSPACE'] = str(tmpdir)
    total_docs = 1000
    duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs)

    num_shards = 4

    f = (Flow().add(uses=os.path.join(cur_dir, 'vectorindexer.yml'),
                    uses_before='_unique',
                    shards=num_shards,
                    name='inc_vec',
                    separated_workspace=True).add(
                        uses=os.path.join(cur_dir, 'docindexer.yml'),
                        uses_before='_unique',
                        shards=num_shards,
                        name='inc_doc',
                        needs=['gateway'],
                        separated_workspace=True).add(
                            uses='_merge', needs=['inc_vec', 'inc_doc']))

    with f:
        f.index(duplicate_docs[:500])
        f.index(duplicate_docs)

    vect_idx_size = 0
    for shard_idx in range(num_shards):
        save_abspath = os.path.join(tmpdir, f'vec_idx-{shard_idx + 1}',
                                    'vec_idx.bin')
        with BaseExecutor.load(save_abspath) as vector_indexer:
            assert isinstance(vector_indexer, NumpyIndexer)
            vect_idx_size += vector_indexer._size
    assert vect_idx_size == num_uniq_docs

    doc_idx_size = 0
    for shard_idx in range(num_shards):
        save_abspath = os.path.join(tmpdir, f'doc_idx-{shard_idx + 1}',
                                    'doc_idx.bin')
        with BaseExecutor.load(save_abspath) as doc_indexer:
            assert isinstance(doc_indexer, BinaryPbIndexer)
            doc_idx_size += doc_indexer._size
    assert doc_idx_size == num_uniq_docs

    del os.environ['JINA_TEST_INCREMENTAL_INDEX_WORKSPACE']
Esempio n. 27
0
def test_indexer_ref_indexer(test_workspace, pea_id):
    tmpdir = os.environ['JINA_TEST_WORKSPACE']
    with BaseExecutor.load_config(os.path.join(
            cur_dir, 'yaml/test-indexer-workspace.yml'),
                                  pea_id=pea_id) as ref_indexer:
        ref_indexer.num_dim = 512
        ref_indexer.touch()

    if pea_id > 0:
        assert os.path.exists(
            os.path.join(tmpdir, f'{ref_indexer.name}-{ref_indexer.pea_id}',
                         f'{ref_indexer.name}.bin'))
    else:
        assert os.path.exists(os.path.join(tmpdir, f'{ref_indexer.name}.bin'))

    with BaseExecutor.load_config(os.path.join(
            cur_dir, 'yaml/test-refindexer-workspace.yml'),
                                  pea_id=pea_id) as indexer:
        assert indexer.num_dim == 512
Esempio n. 28
0
def test_save_and_load(metas, train_data, test_data, target_output_dim):
    encoder = get_encoder(metas, train_data, target_output_dim)
    encoded_data_control = encoder.encode(test_data)
    encoder.touch()
    encoder.save()
    assert os.path.exists(encoder.save_abspath)

    encoder_loaded = BaseExecutor.load(encoder.save_abspath)
    encoded_data_test = encoder_loaded.encode(test_data)
    np.testing.assert_array_equal(encoded_data_test, encoded_data_control)
Esempio n. 29
0
def test_save_load_config(tmp_path):
    from jina.executors import BaseExecutor
    from jina.executors.metas import get_default_metas

    transforms = [{'RandomVerticalFlip': dict(p=1.0)}]

    metas = get_default_metas()
    metas['workspace'] = str(tmp_path)

    orig_crafter = ImageTorchTransformation(transforms, metas=metas)
    orig_crafter.save_config()
    orig_trs = orig_crafter.transforms_specification

    load_crafter1 = BaseExecutor.load_config(
        os.path.join(cur_dir, '../tests/config.yaml'))
    load_crafter2 = BaseExecutor.load_config(orig_crafter.config_abspath)

    assert orig_trs == load_crafter1.transforms_specification
    assert orig_trs == load_crafter2.transforms_specification
Esempio n. 30
0
 def test_save_and_load(self):
     encoder = self.get_encoder()
     data = np.random.rand(1, 784)
     encoded_data_control = encoder.encode(data)
     encoder.touch()
     encoder.save()
     self.assertTrue(os.path.exists(encoder.save_abspath))
     encoder_loaded = BaseExecutor.load(encoder.save_abspath)
     encoded_data_test = encoder_loaded.encode(data)
     np.testing.assert_array_equal(encoded_data_control, encoded_data_test)