예제 #1
0
 def test_check_input(self):
     input_fn = iter([b'1234', b'45467'])
     PyClient.check_input(input_fn)
     input_fn = iter([Document(), Document()])
     PyClient.check_input(input_fn, input_type=ClientInputType.PROTOBUF)
     # bad_input_fn = iter([b'1234', '45467'])  this is invalid as we convert str to binary
     # self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
     bad_input_fn = iter([Document()])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
예제 #2
0
 def test_check_input(self):
     input_fn = iter([b'1234', b'45467'])
     PyClient.check_input(input_fn)
     input_fn = iter([Document(), Document()])
     PyClient.check_input(input_fn)
     bad_input_fn = iter([b'1234', '45467', [12, 2, 3]])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
     bad_input_fn = iter([Document(), None])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
예제 #3
0
 def test_check_input(self):
     input_fn = iter([b'1234', b'45467'])
     PyClient.check_input(input_fn)
     input_fn = iter([Document(), Document()])
     PyClient.check_input(input_fn, input_type=ClientInputType.PROTOBUF)
     bad_input_fn = iter([b'1234', '45467'])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
     bad_input_fn = iter([Document()])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
예제 #4
0
 def test_check_input(self):
     input_fn = iter([b'1234', b'45467'])
     PyClient.check_input(input_fn)
     input_fn = iter([Document(), Document()])
     PyClient.check_input(input_fn, in_proto=True)
     bad_input_fn = iter([b'1234', '45467'])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
     bad_input_fn = iter([Document()])
     self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
예제 #5
0
def test_input_files(patterns, recursive, size, sampling_rate, read_mode):
    PyClient.check_input(
        input_files(
            patterns=patterns,
            recursive=recursive,
            size=size,
            sampling_rate=sampling_rate,
            read_mode=read_mode
        )
    )
예제 #6
0
    def test_io_files(self):
        PyClient.check_input(input_files('*.*'))
        PyClient.check_input(input_files('*.*', recursive=True))
        PyClient.check_input(input_files('*.*', size=2))
        PyClient.check_input(input_files('*.*', size=2, read_mode='rb'))
        PyClient.check_input(input_files('*.*', sampling_rate=.5))

        f = Flow().add(uses='- !URI2Buffer {}')

        def validate_mime_type(req):
            for d in req.index.docs:
                self.assertEqual(d.mime_type, 'text/x-python')

        with f:
            f.index(input_files('*.py'), validate_mime_type)
예제 #7
0
def test_check_input():
    input_fn = iter([b'1234', b'45467'])
    PyClient.check_input(input_fn)
    input_fn = iter([Document(), Document()])
    PyClient.check_input(input_fn)
    bad_input_fn_1 = iter([b'1234', '45467', [12, 2, 3]])
    with pytest.raises(TypeError):
        PyClient.check_input(bad_input_fn_1)
    bad_input_fn_2 = iter([Document(), None])
    with pytest.raises(TypeError):
        PyClient.check_input(bad_input_fn_2)
예제 #8
0
def test_incremental_indexing_sequential_indexers(random_workspace):
    total_docs = 20
    duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs)

    f = (Flow().add(uses=os.path.join(cur_dir, 'uniq_vectorindexer.yml')).add(
        uses=os.path.join(cur_dir, 'uniq_docindexer.yml')))

    PyClient.check_input(duplicate_docs[:10])
    PyClient.check_input(duplicate_docs)

    with f:
        f.index(duplicate_docs[:10])

    with f:
        f.index(duplicate_docs)

    with BaseExecutor.load(random_workspace / 'vec_idx.bin') as vector_indexer:
        assert isinstance(vector_indexer, NumpyIndexer)
        assert vector_indexer._size == num_uniq_docs

    with BaseExecutor.load(random_workspace / 'doc_idx.bin') as doc_indexer:
        assert isinstance(doc_indexer, BinaryPbIndexer)
        assert doc_indexer._size == num_uniq_docs
예제 #9
0
def test_check_input_fail(input_fn):
    with pytest.raises(TypeError):
        PyClient.check_input(input_fn)
예제 #10
0
def test_check_input_success(input_fn):
    PyClient.check_input(input_fn)
예제 #11
0
def test_random_docs():
    ds = random_docs(100)
    PyClient.check_input(ds)
예제 #12
0
        for m in range(10):
            dm = d.matches.add()
            dm.text = 'match to hello world'
            dm.uri = 'doc://match'
            dm.tags['id'] = m
            dm.score.ref_id = d.id
            for mm in range(10):
                dmm = dm.matches.add()
                dmm.text = 'nested match to match'
                dmm.uri = 'doc://match/match'
                dmm.tags['id'] = mm
                dmm.score.ref_id = dm.id
        yield d


PyClient.check_input(random_docs(10))


class DummyDriver(QuerySetReader, BaseDriver):
    def __init__(self, arg1='hello', arg2=456, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._arg1 = arg1
        self._arg2 = arg2


def test_read_from_req():
    def validate1(req):
        assert len(req.docs) == 5

    def validate2(req):
        assert len(req.docs) == 3
예제 #13
0
def test_input_files_with_invalid_read_mode():
    with pytest.raises(RuntimeError):
        PyClient.check_input(input_files(patterns='*.*', read_mode='invalid'))
예제 #14
0
 def test_io_np(self):
     PyClient.check_input(input_numpy(np.random.random([100, 4, 2])))
     PyClient.check_input(['asda', 'dsadas asdasd'])
예제 #15
0
def test_querylang_request():
    qs = QueryLang(SliceQL(start=1, end=4, priority=1))
    PyClient.check_input(random_docs(10), queryset=qs)
예제 #16
0
    def test_io_np(self):
        print(type(np.random.random([100, 4])))
        PyClient.check_input(input_numpy(np.random.random([100, 4, 2])))
        PyClient.check_input(['asda', 'dsadas asdasd'])

        print(type(array2pb(np.random.random([100, 4, 2]))))
예제 #17
0
def test_input_numpy(array):
    PyClient.check_input(input_numpy(array))
예제 #18
0
def test_io_files():
    PyClient.check_input(input_files('*.*'))
    PyClient.check_input(input_files('*.*', recursive=True))
    PyClient.check_input(input_files('*.*', size=2))
    PyClient.check_input(input_files('*.*', size=2, read_mode='rb'))
    PyClient.check_input(input_files('*.*', sampling_rate=0.5))