def index_rest(num_docs): f = Flow().load_config('flows/index.yml') with f: for data_path in [ os.path.join(os.path.dirname(__file__), os.environ.get('JINA_DATA_FILE_1', None)), os.path.join(os.path.dirname(__file__), os.environ.get('JINA_DATA_FILE_2', None)), ]: print(f'Indexing {data_path}') url = f'http://0.0.0.0:{f.port_expose}/index' input_docs = _input_lines( filepath=data_path, size=num_docs, read_mode='r', ) data_json = { 'data': [Document(text=text).dict() for text in input_docs] } r = requests.post(url, json=data_json) if r.status_code != 200: raise Exception( f'api request failed, url: {url}, status: {r.status_code}, content: {r.content}' )
def _docs_from_file(file: str): docs = [] for text in list(_input_lines(filepath=file)): d = Document() d.text = text docs.append(d.dict()) return docs
def test_input_lines_with_jsonlines_docs_groundtruth(): result = list( _input_lines( filepath='tests/unit/clients/python/docs_groundtruth.jsonlines')) assert len(result) == 2 assert result[0][0]['text'] == "a" assert result[0][1]['text'] == "b" assert result[1][0]['text'] == "c" assert result[1][1]['text'] == "d"
def test_input_csv_from_lines_field_resolver(): with open(os.path.join(cur_dir, 'docs.csv')) as fp: result = list( _input_lines(fp, line_format='csv', field_resolver={ 'url': 'uri', 'question': 'text' })) assert len(result) == 2 assert isinstance(result[0], Document) assert result[0].tags['source'] == 'testsrc' assert result[0].uri assert result[0].text
def test_input_lines_with_empty_filepath_and_lines(): with pytest.raises(ValueError): lines = _input_lines(lines=None, filepath=None) for _ in lines: pass
def test_input_lines_with_lines(): lines = ["1", "2", "3"] result = list(_input_lines(lines=lines, size=2)) assert len(result) == 2 assert result[0] == "1" assert result[1] == "2"
def test_input_lines_with_filepath(filepath): result = list(_input_lines(filepath=filepath, size=2)) assert len(result) == 2 assert result[0] == "1\n" assert result[1] == "2\n"
def test_input_lines_with_jsonlines_docs(): result = list( _input_lines(filepath='tests/unit/clients/python/docs.jsonlines')) assert len(result) == 2 assert result[0].text == "a" assert result[1].text == "b"
def test_input_csv_from_lines(): with open(os.path.join(cur_dir, 'docs.csv')) as fp: result = list(_input_lines(fp, line_format='csv')) assert len(result) == 2 assert isinstance(result[0], Document) assert result[0].tags['source'] == 'testsrc'
def test_input_lines_with_filepath(filepath): result = list(_input_lines(filepath=filepath, size=2)) assert len(result) == 2 assert isinstance(result[0], Document)