def test_check_input(self): input_fn = iter([b'1234', b'45467']) PyClient.check_input(input_fn) input_fn = iter([Document(), Document()]) PyClient.check_input(input_fn, input_type=ClientInputType.PROTOBUF) # bad_input_fn = iter([b'1234', '45467']) this is invalid as we convert str to binary # self.assertRaises(TypeError, PyClient.check_input, bad_input_fn) bad_input_fn = iter([Document()]) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
def test_check_input(self): input_fn = iter([b'1234', b'45467']) PyClient.check_input(input_fn) input_fn = iter([Document(), Document()]) PyClient.check_input(input_fn) bad_input_fn = iter([b'1234', '45467', [12, 2, 3]]) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn) bad_input_fn = iter([Document(), None]) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
def test_check_input(self): input_fn = iter([b'1234', b'45467']) PyClient.check_input(input_fn) input_fn = iter([Document(), Document()]) PyClient.check_input(input_fn, input_type=ClientInputType.PROTOBUF) bad_input_fn = iter([b'1234', '45467']) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn) bad_input_fn = iter([Document()]) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
def test_check_input(self): input_fn = iter([b'1234', b'45467']) PyClient.check_input(input_fn) input_fn = iter([Document(), Document()]) PyClient.check_input(input_fn, in_proto=True) bad_input_fn = iter([b'1234', '45467']) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn) bad_input_fn = iter([Document()]) self.assertRaises(TypeError, PyClient.check_input, bad_input_fn)
def test_input_files(patterns, recursive, size, sampling_rate, read_mode): PyClient.check_input( input_files( patterns=patterns, recursive=recursive, size=size, sampling_rate=sampling_rate, read_mode=read_mode ) )
def test_io_files(self): PyClient.check_input(input_files('*.*')) PyClient.check_input(input_files('*.*', recursive=True)) PyClient.check_input(input_files('*.*', size=2)) PyClient.check_input(input_files('*.*', size=2, read_mode='rb')) PyClient.check_input(input_files('*.*', sampling_rate=.5)) f = Flow().add(uses='- !URI2Buffer {}') def validate_mime_type(req): for d in req.index.docs: self.assertEqual(d.mime_type, 'text/x-python') with f: f.index(input_files('*.py'), validate_mime_type)
def test_check_input(): input_fn = iter([b'1234', b'45467']) PyClient.check_input(input_fn) input_fn = iter([Document(), Document()]) PyClient.check_input(input_fn) bad_input_fn_1 = iter([b'1234', '45467', [12, 2, 3]]) with pytest.raises(TypeError): PyClient.check_input(bad_input_fn_1) bad_input_fn_2 = iter([Document(), None]) with pytest.raises(TypeError): PyClient.check_input(bad_input_fn_2)
def test_incremental_indexing_sequential_indexers(random_workspace): total_docs = 20 duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs) f = (Flow().add(uses=os.path.join(cur_dir, 'uniq_vectorindexer.yml')).add( uses=os.path.join(cur_dir, 'uniq_docindexer.yml'))) PyClient.check_input(duplicate_docs[:10]) PyClient.check_input(duplicate_docs) with f: f.index(duplicate_docs[:10]) with f: f.index(duplicate_docs) with BaseExecutor.load(random_workspace / 'vec_idx.bin') as vector_indexer: assert isinstance(vector_indexer, NumpyIndexer) assert vector_indexer._size == num_uniq_docs with BaseExecutor.load(random_workspace / 'doc_idx.bin') as doc_indexer: assert isinstance(doc_indexer, BinaryPbIndexer) assert doc_indexer._size == num_uniq_docs
def test_check_input_fail(input_fn): with pytest.raises(TypeError): PyClient.check_input(input_fn)
def test_check_input_success(input_fn): PyClient.check_input(input_fn)
def test_random_docs(): ds = random_docs(100) PyClient.check_input(ds)
for m in range(10): dm = d.matches.add() dm.text = 'match to hello world' dm.uri = 'doc://match' dm.tags['id'] = m dm.score.ref_id = d.id for mm in range(10): dmm = dm.matches.add() dmm.text = 'nested match to match' dmm.uri = 'doc://match/match' dmm.tags['id'] = mm dmm.score.ref_id = dm.id yield d PyClient.check_input(random_docs(10)) class DummyDriver(QuerySetReader, BaseDriver): def __init__(self, arg1='hello', arg2=456, *args, **kwargs): super().__init__(*args, **kwargs) self._arg1 = arg1 self._arg2 = arg2 def test_read_from_req(): def validate1(req): assert len(req.docs) == 5 def validate2(req): assert len(req.docs) == 3
def test_input_files_with_invalid_read_mode(): with pytest.raises(RuntimeError): PyClient.check_input(input_files(patterns='*.*', read_mode='invalid'))
def test_io_np(self): PyClient.check_input(input_numpy(np.random.random([100, 4, 2]))) PyClient.check_input(['asda', 'dsadas asdasd'])
def test_querylang_request(): qs = QueryLang(SliceQL(start=1, end=4, priority=1)) PyClient.check_input(random_docs(10), queryset=qs)
def test_io_np(self): print(type(np.random.random([100, 4]))) PyClient.check_input(input_numpy(np.random.random([100, 4, 2]))) PyClient.check_input(['asda', 'dsadas asdasd']) print(type(array2pb(np.random.random([100, 4, 2]))))
def test_input_numpy(array): PyClient.check_input(input_numpy(array))
def test_io_files(): PyClient.check_input(input_files('*.*')) PyClient.check_input(input_files('*.*', recursive=True)) PyClient.check_input(input_files('*.*', size=2)) PyClient.check_input(input_files('*.*', size=2, read_mode='rb')) PyClient.check_input(input_files('*.*', sampling_rate=0.5))