Esempio n. 1
0
def test_input_huggingface_datasets_with_no_split(dataset_configs):
    with pytest.raises(ValueError):
        result = from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            name=dataset_configs['adversarial']['name'],
        )
        for _ in result:
            pass
Esempio n. 2
0
def test_input_huggingface_datasets_with_tweet_dataset(dataset_configs):
    result = list(
        from_huggingface_datasets(
            dataset_configs['tweet_eval']['dataset_path'],
            name=dataset_configs['tweet_eval']['name'],
            split=dataset_configs['tweet_eval']['split'],
        ))
    assert isinstance(result[0], Document)
    assert result[0].text
Esempio n. 3
0
def test_input_huggingface_datasets_with_filter_fields_and_no_resolver(
        dataset_configs):
    with pytest.raises(ValueError):
        result = from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            name=dataset_configs['adversarial']['name'],
            filter_fields=True,
        )
        for _ in result:
            pass
Esempio n. 4
0
def test_input_huggingface_datasets_with_field_resolver(dataset_configs):
    field_resolver = {'question': 'text'}
    result = list(
        from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            field_resolver=field_resolver,
            name=dataset_configs['adversarial']['name'],
            split=dataset_configs['adversarial']['split'],
        ))
    assert isinstance(result[0], Document)
    assert result[0].text
    assert 'title' in result[0].tags
Esempio n. 5
0
def test_input_huggingface_datasets_from_csv_file(dataset_configs):
    field_resolver = {'question': 'text'}
    result = list(
        from_huggingface_datasets(
            'csv',
            field_resolver=field_resolver,
            data_files='docs.csv',
            split='train',
        ))
    assert len(result) == 2
    assert isinstance(result[0], Document)
    assert result[0].text == 'What are the symptoms?'
    assert result[0].tags['source'] == 'testsrc'
Esempio n. 6
0
def test_client_huggingface_datasets(protocol, mocker, func_name):
    with Flow(protocol=protocol).add() as f:
        mock = mocker.Mock()
        getattr(f, f'{func_name}')(
            from_huggingface_datasets(
                dataset_path='adversarial_qa',
                size=2,
                name='adversarialQA',
                split='test',
                field_resolver={'question': 'text'},
            ),
            on_done=mock,
        )
        mock.assert_called_once()
Esempio n. 7
0
def test_input_huggingface_datasets_from_path(dataset_configs, size,
                                              sampling_rate):
    result = list(
        from_huggingface_datasets(
            dataset_configs['adversarial']['dataset_path'],
            size=size,
            name=dataset_configs['adversarial']['name'],
            sampling_rate=sampling_rate,
            split=dataset_configs['adversarial']['split'],
        ))

    if size is not None:
        assert len(result) == size

    assert isinstance(result[0], Document)