def test_input_lines_with_jsonlines_docs_groundtruth(): result = list( from_lines( filepath='tests/unit/clients/python/docs_groundtruth.jsonlines')) assert len(result) == 2 assert result[0][0].text == "a" assert result[0][1].text == "b" assert result[1][0].text == "c" assert result[1][1].text == "d"
def test_input_csv_from_lines_field_resolver(): with open(os.path.join(cur_dir, 'docs.csv')) as fp: result = list( from_lines(fp, line_format='csv', field_resolver={'question': 'text'})) assert len(result) == 2 assert isinstance(result[0], Document) assert result[0].tags['source'] == 'testsrc' assert not result[0].uri assert result[0].text
def test_input_lines_with_jsonlines_file(size, sampling_rate): result = list( from_lines( filepath=os.path.join(cur_dir, 'docs.jsonlines'), size=size, sampling_rate=sampling_rate, )) assert len(result) == size if size is not None else 2 if sampling_rate is None: assert result[0].text == "a" if size is None: assert result[1].text == "b"
def test_input_lines_with_jsonslines(size, sampling_rate): with open(os.path.join(cur_dir, 'docs.jsonlines')) as fp: lines = fp.readlines() result = list( from_lines(lines=lines, line_format='json', size=size, sampling_rate=sampling_rate)) assert len(result) == size if size is not None else 2 if sampling_rate is None: assert result[0].text == "a" if size is None: assert result[1].text == "b"
def test_from_lines_with_tilde(): if not os.path.exists(os.path.expanduser('~/.jina')): os.mkdir(os.path.expanduser('~/.jina')) shutil.copy( os.path.join(cur_dir, 'docs_groundtruth.jsonlines'), os.path.expanduser('~/.jina'), ) result = list(from_lines(filepath='~/.jina/docs_groundtruth.jsonlines')) assert len(result) == 2 assert result[0][0].text == "a" assert result[0][1].text == "b" assert result[1][0].text == "c" assert result[1][1].text == "d"
def test_input_lines_with_jsonlines_docs(): result = list(from_lines(filepath=os.path.join(cur_dir, 'docs.jsonlines'))) assert len(result) == 2 assert result[0].text == "a" assert result[1].text == "b"
def test_input_lines_with_empty_filepath_and_lines(): with pytest.raises(ValueError): lines = from_lines(lines=None, filepath=None) for _ in lines: pass
def test_input_csv_from_lines(): with open(os.path.join(cur_dir, 'docs.csv')) as fp: result = list(from_lines(fp, line_format='csv')) assert len(result) == 2 assert isinstance(result[0], Document) assert result[0].tags['source'] == 'testsrc'
def test_input_lines_with_filepath(filepath): result = list(from_lines(filepath=filepath, size=2)) assert len(result) == 2 assert isinstance(result[0], Document)