Esempi in Python per SourceFile

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: src.delphyne.model.source_data

Classe/tipologia: SourceFile

Esempi su hotexamples.com: 26

SourceFile in Python: 26 esempi trovati. Questi sono i migliori esempi reali in Python per src.delphyne.model.source_data.SourceFile, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

get_csv_as_df(12)

SourceFile(4)

get_csv_as_list_of_dicts(4)

get_line_count(3)

_read_csv_as_df(2)

_retrieve_cached_df(2)

get_csv_as_generator_of_dicts(2)

get_sas_as_df(2)

_cache_df_copy(1)

apply_dtypes(1)

cache_df(1)

Esempio n. 1

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_modifying_returned_df_does_not_affect_cached_df(
        source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    df.drop(labels='column_a', axis=1, inplace=True)
    assert 'column_a' not in df.columns
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    assert 'column_a' in df.columns

Esempio n. 2

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_kwargs_overrule_file_params(sas_source_file: SourceFile):
    """Adding encoding='iso8859' as a kwarg to get_sas_as_df, should
    overrule the original encoding value in the SourceFile's params."""
    assert sas_source_file._params['encoding'] == 'utf-8'
    with pytest.raises(UnicodeDecodeError):
        sas_source_file.get_sas_as_df(apply_dtypes=False)
    df = sas_source_file.get_sas_as_df(apply_dtypes=False, encoding='iso8859')
    assert df.shape == (30, 5)

Esempio n. 3

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_source_file2_apply_dtypes_manually(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False)
    assert list(df.dtypes) == [dtype('O')] * 4
    df = source_file2.apply_dtypes(df, errors='raise')
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('float64'),
    }
    assert df.dtypes.to_dict() == expected_dtypes

Esempio n. 4

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_absent_binary_flag(caplog, sas_source_file: SourceFile):
    """When a file has no information on whether it is binary, the
    returned line count should be None, and a warning logged."""
    del sas_source_file._params['binary']
    with caplog.at_level(logging.DEBUG):
        assert sas_source_file.get_line_count() is None
    assert 'No binary field available for for file beer.sas7bdat' in caplog.text

Esempio n. 5

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_source_file2_has_config_dtypes(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=True)
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('float64'),
    }
    assert df.dtypes.to_dict() == expected_dtypes

Esempio n. 6

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def source_file2_partial_dtypes(source_data_test_dir: Path) -> SourceFile:
    """Get SourceFile instance of source_file2.tsv with dtypes set for
    only 2/4 columns."""
    file_path = source_data_test_dir / 'test_dir1' / 'source_file2.tsv'
    dtypes = {
        'column_b': 'Int64',
        'column_c': 'datetime64[ns]',
    }
    params = get_file_params(dtypes=dtypes)
    return SourceFile(file_path, params)

Esempio n. 7

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def source_file2(source_data_test_dir: Path) -> SourceFile:
    """Get SourceFile instance of source_file2.tsv with dtypes."""
    file_path = source_data_test_dir / 'test_dir1' / 'source_file2.tsv'
    dtypes = {
        'column_a': 'object',
        'column_b': 'Int64',
        'column_c': 'datetime64[ns]',
        'column_d': 'float64',
    }
    params = get_file_params(dtypes=dtypes)
    return SourceFile(file_path, params)

Esempio n. 8

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_get_csv_with_a_cache_does_not_reload_file(source_file2: SourceFile):
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df = MagicMock()
    source_file2._read_csv_as_df = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df.assert_called_once()
    source_file2._read_csv_as_df.assert_not_called()

Esempio n. 9

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_source_file2_has_partial_dtypes(
        source_file2_partial_dtypes: SourceFile):
    """
    If for only a subset of the columns the dtypes were provided, those
    should be applied, and the other columns should have 'object' dtype.
    """
    df = source_file2_partial_dtypes.get_csv_as_df(apply_dtypes=True)
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('O'),
    }
    assert df.dtypes.to_dict() == expected_dtypes

Esempio n. 10

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_force_reload_ignores_cache(source_file2: SourceFile):
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df = MagicMock()
    source_file2._read_csv_as_df = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False,
                               cache=True,
                               force_reload=True)
    source_file2._retrieve_cached_df.assert_not_called()
    source_file2._read_csv_as_df.assert_called_once()

Esempio n. 11

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def sas_source_file(source_data_test_dir: Path) -> SourceFile:
    """Get DataFrame of beer.sas7bdat."""
    file_path = source_data_test_dir / 'test_dir1' / 'beer.sas7bdat'
    params = get_file_params(binary=True)
    return SourceFile(file_path, params)

Esempio n. 12

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_cache_method_is_called(source_file2: SourceFile):
    source_file2._cache_df_copy = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._cache_df_copy.assert_called_once()

Esempio n. 13

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_setting_cached_df_manually(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=False)
    df.drop(labels='column_a', axis=1, inplace=True)
    source_file2.cache_df(df)
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=False)
    assert 'column_a' not in df.columns

Esempio n. 14

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def source_file1_df(source_data_test_dir: Path) -> pd.DataFrame:
    """Get DataFrame of source_file1.csv without setting dtypes."""
    file_path = source_data_test_dir / 'test_dir1' / 'source_file1.csv'
    params = get_file_params(delimiter=',')
    source_file = SourceFile(file_path, params)
    return source_file.get_csv_as_df(apply_dtypes=False)

Esempio n. 15

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_additional_kwargs(source_file2: SourceFile):
    assert source_file2.get_csv_as_list_of_dicts(strict=True)[0]

Esempio n. 16

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_values_accessible_via_column_names(source_file2: SourceFile):
    rows = source_file2.get_csv_as_generator_of_dicts()
    first_row = next(rows)
    assert first_row['column_a'] == 'Tungsten carbide'

Esempio n. 17

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_get_csv_as_df_requires_encoding(source_file2: SourceFile):
    del source_file2._params['encoding']
    with pytest.raises(ValueError) as excinfo:
        source_file2.get_csv_as_df(apply_dtypes=False)
    assert "encoding" in str(excinfo.value)

Esempio n. 18

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_cache(source_file2: SourceFile):
    source_file2.get_csv_as_list_of_dicts(cache=True)
    assert len(source_file2._csv) == 4

Esempio n. 19

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_types(source_file2: SourceFile):
    rows = source_file2.get_csv_as_list_of_dicts()
    assert type(rows) == list
    # csv returns OrderedDict <= 3.7, regular dict for >= 3.8
    assert all(issubclass(type(row), dict) for row in rows)

Esempio n. 20

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_binary_file_wont_return_line_count(sas_source_file: SourceFile):
    assert sas_source_file.get_line_count() is None

Esempio n. 21

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_get_line_count(source_file2: SourceFile):
    assert source_file2.get_line_count() == 4

Esempio n. 22

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_invalid_kwarg_raises_error(source_file2: SourceFile):
    with pytest.raises(TypeError) as excinfo:
        source_file2.get_csv_as_df(apply_dtypes=False, bad_kwarg=42)
    assert "unexpected keyword argument 'bad_kwarg'" in str(excinfo.value)

Esempio n. 23

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_source_file2_has_only_object_dtypes(source_file2: SourceFile):
    """When apply_dtypes=False, all columns should have object dtype."""
    df = source_file2.get_csv_as_df(apply_dtypes=False)
    assert list(df.dtypes) == [dtype('O')] * 4

Esempio n. 24

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_generator_not_subscriptable(source_file2: SourceFile):
    rows = source_file2.get_csv_as_generator_of_dicts()
    with pytest.raises(TypeError) as excinfo:
        rows[1]  # try to access 2nd row directly
    assert "'generator' object is not subscriptable" in str(excinfo.value)

Esempio n. 25

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_read_csv_no_cache(source_file2: SourceFile):
    source_file2.get_csv_as_list_of_dicts()
    assert not source_file2._csv

Esempio n. 26

0

Mostra file

File: test_source_file.py Progetto: thehyve/delphyne

def test_sas_requires_encoding_param(sas_source_file: SourceFile):
    del sas_source_file._params['encoding']
    with pytest.raises(ValueError) as excinfo:
        sas_source_file.get_sas_as_df(apply_dtypes=False)
    assert "encoding" in str(excinfo.value)