Esempio n. 1
0
def test_shards_insufficient_data():
    """THIS IS SUPER IMPORTANT FOR TESTING SHARDS

    IF THIS FAILED, DONT IGNORE IT, DEBUG IT
    """
    index_docs = 3
    parallel = 4

    def validate(req):
        assert len(req.docs) == 1
        assert len(req.docs[0].matches) == index_docs

        for d in req.docs[0].matches:
            assert hasattr(d, 'weight')
            assert d.weight
            assert d.meta_info == b'hello world'

    f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel,
                   separated_workspace=True)
    with f:
        f.index(input_fn=random_docs(index_docs), override_doc_id=False)

    time.sleep(2)
    with f:
        pass
    time.sleep(2)
    f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=parallel,
                   separated_workspace=True, polling='all', uses_after='_merge_all')
    with f:
        f.search(input_fn=random_queries(1, index_docs), override_doc_id=False,
                 callback_on='body')
    time.sleep(2)
    rm_files(['test-docshard-tmp'])
Esempio n. 2
0
def test_load_flow_from_yaml():
    with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)
        with open(os.path.join(cur_dir, '../yaml/swarm-out.yml'),
                  'w') as fp, a:
            a.to_swarm_yaml(fp)
        rm_files([os.path.join(cur_dir, '../yaml/swarm-out.yml')])
Esempio n. 3
0
def test_flow_yaml_dump():
    f = Flow(optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY, no_gateway=True)
    f.save_config('test1.yml')

    fl = Flow.load_config('test1.yml')
    assert f.args.optimize_level == fl.args.optimize_level
    rm_files(['test1.yml'])
Esempio n. 4
0
def test_shards():
    f = Flow().add(name='doc_pb', uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'), parallel=3)
    with f:
        f.index(input_fn=random_docs(1000), random_doc_id=False)
    with f:
        pass
    rm_files(['test-docshard-tmp'])
Esempio n. 5
0
def validate(ids, expect):
    for j in ids:
        fname = f'tmp{j}.txt'
        assert os.path.exists(fname) == expect
        if expect:
            with open(fname) as fp:
                assert fp.read() != ''
        rm_files([fname])
Esempio n. 6
0
def test_load_flow_from_yaml():
    with open(cur_dir.parent / 'yaml' / 'test-flow.yml') as fp:
        a = Flow.load_config(fp)
        with a:
            with open(str(cur_dir.parent / 'yaml' / 'swarm-out.yml'),
                      'w') as fp:
                a.to_swarm_yaml(fp)
        rm_files([str(cur_dir.parent / 'yaml' / 'swarm-out.yml')])
Esempio n. 7
0
def test_compound_from_yaml():
    a = BaseExecutor.load_config(str(cur_dir / 'yaml/npvec.yml'))
    assert isinstance(a, CompoundExecutor)
    assert callable(getattr(a, 'add'))
    assert callable(getattr(a, 'query'))
    assert callable(getattr(a, 'meta_add'))
    assert callable(getattr(a, 'meta_query'))
    rm_files([c.index_abspath for c in a.components])
    rm_files(['test-workspace'])
Esempio n. 8
0
def test_flow_with_jump():
    def _validate(f):
        node = f._pod_nodes['gateway']
        assert node.head_args.socket_in == SocketType.PULL_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r1']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUB_BIND
        node = f._pod_nodes['r2']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r3']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r4']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r5']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r6']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r8']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r9']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r10']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_BIND
        for name, node in f._pod_nodes.items():
            assert node.peas_args['peas'][0] == node.head_args
            assert node.peas_args['peas'][0] == node.tail_args

    f = (Flow().add(name='r1')
         .add(name='r2')
         .add(name='r3', needs='r1')
         .add(name='r4', needs='r2')
         .add(name='r5', needs='r3')
         .add(name='r6', needs='r4')
         .add(name='r8', needs='r6')
         .add(name='r9', needs='r5')
         .add(name='r10', needs=['r9', 'r8']))

    with f:
        _validate(f)

    f.save_config('tmp.yml')
    Flow.load_config('tmp.yml')

    with Flow.load_config('tmp.yml') as f:
        _validate(f)

    rm_files(['tmp.yml'])
Esempio n. 9
0
def test_compositional_dump():
    a = CompoundExecutor()
    a.components = lambda: [BaseExecutor(), BaseExecutor()]
    assert a.name
    a.touch()
    a.save()
    a.save_config()
    assert Path(a.save_abspath).exists()
    assert Path(a.config_abspath).exists()
    rm_files([a.save_abspath, a.config_abspath])
Esempio n. 10
0
def test_compositional_dump(test_metas):
    a = CompoundExecutor(metas=test_metas)
    a.components = lambda: [BaseExecutor(), BaseExecutor()]
    assert a.name
    a.touch()
    a.save()
    a.save_config()
    assert os.path.exists(a.save_abspath)
    assert os.path.exists(a.config_abspath)
    rm_files([a.save_abspath, a.config_abspath])
Esempio n. 11
0
def test_flow_yaml_dump():
    f = Flow(logserver_config=os.path.join(cur_dir, '../yaml/test-server-config.yml'),
             optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY,
             no_gateway=True)
    f.save_config('test1.yml')

    fl = Flow.load_config('test1.yml')
    assert f.args.logserver_config == fl.args.logserver_config
    assert f.args.optimize_level == fl.args.optimize_level
    rm_files(['test1.yml'])
Esempio n. 12
0
def test_shards():
    f = Flow().add(name='doc_pb',
                   uses=str(cur_dir.parent / 'yaml' / 'test-docpb.yml'),
                   parallel=3,
                   separated_workspace=True)
    with f:
        f.index(input_fn=random_docs(1000), random_doc_id=False)
    with f:
        pass
    rm_files(['test-docshard-tmp'])
Esempio n. 13
0
def test_index_text_files():
    def validate(req):
        for d in req.docs:
            assert d.text

    f = (Flow(read_only=True).add(uses=os.path.join(cur_dir, '../yaml/datauriindex.yml'), timeout_ready=-1))

    with f:
        f.index_files('*.py', output_fn=validate, callback_on='body')

    rm_files(['doc.gzip'])
Esempio n. 14
0
def test_shards(restful):
    f = (Flow(restful=restful).add(name='doc_pb',
                                   uses=os.path.join(cur_dir,
                                                     '../yaml/test-docpb.yml'),
                                   parallel=3,
                                   separated_workspace=True))
    with f:
        f.index(input_fn=random_docs(1000), random_doc_id=False)
    with f:
        pass
    rm_files(['test-docshard-tmp'])
Esempio n. 15
0
def test_transform_encoder_train_and_encode():
    train_data = np.random.rand(2000, input_dim)
    encoder = TransformEncoder(output_dim=target_output_dim)
    from sklearn.random_projection import GaussianRandomProjection
    encoder.model = GaussianRandomProjection(n_components=target_output_dim)
    encoder.train(train_data)
    test_data = np.random.rand(10, input_dim)
    encoded_data = encoder.encode(test_data)
    assert encoded_data.shape == (test_data.shape[0], target_output_dim)
    assert type(encoded_data) == np.ndarray

    rm_files([encoder.save_abspath, encoder.config_abspath])
Esempio n. 16
0
def test_transform_encoder_load_from_pickle():
    train_data = np.random.rand(2000, input_dim)
    filename = 'transformer_model.model'
    from sklearn.random_projection import GaussianRandomProjection
    model = GaussianRandomProjection(n_components=target_output_dim)
    pickle.dump(model.fit(train_data), open(filename, 'wb'))
    encoder = TransformEncoder(model_path=filename)
    test_data = np.random.rand(10, input_dim)
    encoded_data = encoder.encode(test_data)
    transformed_data = model.transform(test_data)
    assert encoded_data.shape == (test_data.shape[0], target_output_dim)
    assert type(encoded_data) == np.ndarray
    np.testing.assert_almost_equal(encoded_data, transformed_data)
    rm_files([encoder.config_abspath, filename, encoder.save_abspath])
Esempio n. 17
0
def test_flow_identical():
    with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)

    b = (Flow()
         .add(name='chunk_seg', parallel=3)
         .add(name='wqncode1', parallel=2)
         .add(name='encode2', parallel=2, needs='chunk_seg')
         .join(['wqncode1', 'encode2']))

    a.save_config('test2.yml')

    c = Flow.load_config('test2.yml')

    assert a == b
    assert a == c

    with a as f:
        node = f._pod_nodes['gateway']
        assert node.head_args.socket_in == SocketType.PULL_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['chunk_seg']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUB_BIND

        node = f._pod_nodes['wqncode1']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['encode2']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

    rm_files(['test2.yml'])
Esempio n. 18
0
def test_index_text_files(mocker, restful):
    def validate(req):
        assert len(req.docs) > 0
        for d in req.docs:
            assert d.text

    response_mock = mocker.Mock(wrap=validate)

    f = (Flow(restful=restful, read_only=True)
         .add(uses=os.path.join(cur_dir, '../yaml/datauriindex.yml'), timeout_ready=-1))

    with f:
        f.index_files('*.py', on_done=response_mock, callback_on='body')

    rm_files(['doc.gzip'])
    response_mock.assert_called()
Esempio n. 19
0
def test_shards_insufficient_data(mocker, restful):
    """THIS IS SUPER IMPORTANT FOR TESTING SHARDS

    IF THIS FAILED, DONT IGNORE IT, DEBUG IT
    """
    index_docs = 3
    parallel = 4

    mock = mocker.Mock()

    def validate(req):
        mock()
        assert len(req.docs) == 1
        assert len(req.docs[0].matches) == index_docs

        for d in req.docs[0].matches:
            assert hasattr(d, 'weight')
            assert d.weight

    f = (Flow(restful=restful)
         .add(name='doc_pb',
              uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'),
              parallel=parallel,
              separated_workspace=True))
    with f:
        f.index(input_fn=random_docs(index_docs))

    time.sleep(2)
    with f:
        pass
    time.sleep(2)
    f = (Flow(restful=restful)
         .add(name='doc_pb',
              uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'),
              parallel=parallel,
              separated_workspace=True,
              polling='all',
              uses_after='_merge_chunks'))
    with f:
        f.search(input_fn=random_queries(1, index_docs),
                 callback_on='body',
                 on_done=validate)
    time.sleep(2)
    rm_files(['test-docshard-tmp'])
    mock.assert_called_once()
Esempio n. 20
0
def test_cache_driver_twice():
    docs = list(random_docs(10))
    driver = MockCacheDriver()
    with DocIDCache(filename) as executor:
        assert not executor.handler_mutex
        driver.attach(executor=executor, pea=None)

        driver._traverse_apply(docs)

        with pytest.raises(NotImplementedError):
            # duplicate docs
            driver._traverse_apply(docs)

        # new docs
        docs = list(random_docs(10))
        driver._traverse_apply(docs)

        # check persistence
        assert os.path.exists(filename)
        rm_files([filename])
Esempio n. 21
0
def test_compositional_route(monkeypatch):
    monkeypatch.setattr(BaseExecutor, 'exec_methods', ['say'])
    da = DummyA()
    db = DummyB()
    a = CompoundExecutor()

    a.components = lambda: [da, db]
    assert a.say_all() == ['a', 'b']
    with pytest.raises(AttributeError):
        a.say()

    b = CompoundExecutor({'say': {da.name: 'say'}})
    b.components = lambda: [da, db]
    assert b.say_all() == ['a', 'b']
    assert b.say() == 'a'
    b.add_route('say', db.name, 'say')
    assert b.say() == 'b'
    b.save_config()
    assert Path(b.config_abspath).exists()

    c = BaseExecutor.load_config(b.config_abspath)
    assert c.say_all() == ['a', 'b']
    assert c.say() == 'a'

    b.add_route('say', db.name, 'say', is_stored=True)
    b.save_config()
    c = BaseExecutor.load_config(b.config_abspath)
    assert c.say_all() == ['a', 'b']
    assert c.say() == 'b'

    b.touch()
    b.save()
    assert Path(b.save_abspath).exists()

    d = BaseExecutor.load(b.save_abspath)
    assert d.say_all() == ['a', 'b']
    assert d.say() == 'b'

    rm_files([b.save_abspath, b.config_abspath])
Esempio n. 22
0
def test_cache_driver_from_file():
    docs = list(random_docs(10))
    with open(filename, 'wb') as fp:
        fp.write(
            np.array([uid.id2hash(d.id) for d in docs],
                     dtype=np.int64).tobytes())

    driver = MockCacheDriver()
    with DocIDCache(filename) as executor:
        assert not executor.handler_mutex
        driver.attach(executor=executor, pea=None)

        with pytest.raises(NotImplementedError):
            # duplicate docs
            driver._traverse_apply(docs)

        # new docs
        docs = list(random_docs(10))
        driver._traverse_apply(docs)

        # check persistence
        assert os.path.exists(filename)
        rm_files([filename])
Esempio n. 23
0
def test_standard_query():
    mem1 = used_memory(1)
    print(used_memory_readable())
    with NumpyIndexer.load('a.bin') as ni:
        ni.batch_size = 256
        print(used_memory_readable())
        print(ni.raw_ndarray.shape)
        print(used_memory_readable())
        with TimeContext('query topk') as ti:
            result = ni.query(queries, top_k=10)
            mem2 = used_memory(1)
            print(used_memory_readable())
            print(result[0].shape)
        with open(summary_file, 'a') as fp:
            json.dump(
                {
                    'name': 'naive',
                    'memory': mem2 - mem1,
                    'readable': get_readable_size(mem2 - mem1),
                    'time': ti.duration
                }, fp)
            fp.write('\n')

    rm_files([ni.index_abspath, ni.save_abspath, 'a.bin', 'a.gz'])
Esempio n. 24
0
def run_around_tests():
    yield
    rm_files([
        'vec1.gz', 'vec2.gz', 'chunk1.gz', 'chunk2.gz', 'vecidx1.bin',
        'vecidx2.bin', 'kvidx1.bin', 'kvidx2.bin'
    ])