예제 #1
0
def test_input_lines_with_jsonlines_docs_groundtruth():
    result = list(
        from_lines(
            filepath='tests/unit/clients/python/docs_groundtruth.jsonlines'))
    assert len(result) == 2
    assert result[0][0].text == "a"
    assert result[0][1].text == "b"
    assert result[1][0].text == "c"
    assert result[1][1].text == "d"
예제 #2
0
def test_input_csv_from_lines_field_resolver():
    with open(os.path.join(cur_dir, 'docs.csv')) as fp:
        result = list(
            from_lines(fp,
                       line_format='csv',
                       field_resolver={'question': 'text'}))
    assert len(result) == 2
    assert isinstance(result[0], Document)
    assert result[0].tags['source'] == 'testsrc'
    assert not result[0].uri
    assert result[0].text
예제 #3
0
def test_input_lines_with_jsonlines_file(size, sampling_rate):
    result = list(
        from_lines(
            filepath=os.path.join(cur_dir, 'docs.jsonlines'),
            size=size,
            sampling_rate=sampling_rate,
        ))
    assert len(result) == size if size is not None else 2
    if sampling_rate is None:
        assert result[0].text == "a"
        if size is None:
            assert result[1].text == "b"
예제 #4
0
def test_input_lines_with_jsonslines(size, sampling_rate):
    with open(os.path.join(cur_dir, 'docs.jsonlines')) as fp:
        lines = fp.readlines()
    result = list(
        from_lines(lines=lines,
                   line_format='json',
                   size=size,
                   sampling_rate=sampling_rate))
    assert len(result) == size if size is not None else 2
    if sampling_rate is None:
        assert result[0].text == "a"
        if size is None:
            assert result[1].text == "b"
예제 #5
0
파일: test_io.py 프로젝트: paddlelaw/jina
def test_from_lines_with_tilde():

    if not os.path.exists(os.path.expanduser('~/.jina')):
        os.mkdir(os.path.expanduser('~/.jina'))
    shutil.copy(
        os.path.join(cur_dir, 'docs_groundtruth.jsonlines'),
        os.path.expanduser('~/.jina'),
    )
    result = list(from_lines(filepath='~/.jina/docs_groundtruth.jsonlines'))
    assert len(result) == 2
    assert result[0][0].text == "a"
    assert result[0][1].text == "b"
    assert result[1][0].text == "c"
    assert result[1][1].text == "d"
예제 #6
0
def test_input_lines_with_jsonlines_docs():
    result = list(from_lines(filepath=os.path.join(cur_dir, 'docs.jsonlines')))
    assert len(result) == 2
    assert result[0].text == "a"
    assert result[1].text == "b"
예제 #7
0
def test_input_lines_with_empty_filepath_and_lines():
    with pytest.raises(ValueError):
        lines = from_lines(lines=None, filepath=None)
        for _ in lines:
            pass
예제 #8
0
def test_input_csv_from_lines():
    with open(os.path.join(cur_dir, 'docs.csv')) as fp:
        result = list(from_lines(fp, line_format='csv'))
    assert len(result) == 2
    assert isinstance(result[0], Document)
    assert result[0].tags['source'] == 'testsrc'
예제 #9
0
def test_input_lines_with_filepath(filepath):
    result = list(from_lines(filepath=filepath, size=2))
    assert len(result) == 2
    assert isinstance(result[0], Document)