Example #1
0
def test_get_csv_with_a_cache_does_not_reload_file(source_file2: SourceFile):
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df = MagicMock()
    source_file2._read_csv_as_df = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df.assert_called_once()
    source_file2._read_csv_as_df.assert_not_called()
Example #2
0
def test_modifying_returned_df_does_not_affect_cached_df(
        source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    df.drop(labels='column_a', axis=1, inplace=True)
    assert 'column_a' not in df.columns
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    assert 'column_a' in df.columns
Example #3
0
def test_force_reload_ignores_cache(source_file2: SourceFile):
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._retrieve_cached_df = MagicMock()
    source_file2._read_csv_as_df = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False,
                               cache=True,
                               force_reload=True)
    source_file2._retrieve_cached_df.assert_not_called()
    source_file2._read_csv_as_df.assert_called_once()
Example #4
0
def test_source_file2_has_config_dtypes(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=True)
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('float64'),
    }
    assert df.dtypes.to_dict() == expected_dtypes
Example #5
0
def test_source_file2_apply_dtypes_manually(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False)
    assert list(df.dtypes) == [dtype('O')] * 4
    df = source_file2.apply_dtypes(df, errors='raise')
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('float64'),
    }
    assert df.dtypes.to_dict() == expected_dtypes
Example #6
0
def test_source_file2_has_partial_dtypes(
        source_file2_partial_dtypes: SourceFile):
    """
    If for only a subset of the columns the dtypes were provided, those
    should be applied, and the other columns should have 'object' dtype.
    """
    df = source_file2_partial_dtypes.get_csv_as_df(apply_dtypes=True)
    expected_dtypes = {
        'column_a': dtype('O'),
        'column_b': pd.Int64Dtype(),
        'column_c': dtype('<M8[ns]'),
        'column_d': dtype('O'),
    }
    assert df.dtypes.to_dict() == expected_dtypes
Example #7
0
def test_source_file2_has_only_object_dtypes(source_file2: SourceFile):
    """When apply_dtypes=False, all columns should have object dtype."""
    df = source_file2.get_csv_as_df(apply_dtypes=False)
    assert list(df.dtypes) == [dtype('O')] * 4
Example #8
0
def source_file1_df(source_data_test_dir: Path) -> pd.DataFrame:
    """Get DataFrame of source_file1.csv without setting dtypes."""
    file_path = source_data_test_dir / 'test_dir1' / 'source_file1.csv'
    params = get_file_params(delimiter=',')
    source_file = SourceFile(file_path, params)
    return source_file.get_csv_as_df(apply_dtypes=False)
Example #9
0
def test_invalid_kwarg_raises_error(source_file2: SourceFile):
    with pytest.raises(TypeError) as excinfo:
        source_file2.get_csv_as_df(apply_dtypes=False, bad_kwarg=42)
    assert "unexpected keyword argument 'bad_kwarg'" in str(excinfo.value)
Example #10
0
def test_setting_cached_df_manually(source_file2: SourceFile):
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=False)
    df.drop(labels='column_a', axis=1, inplace=True)
    source_file2.cache_df(df)
    df = source_file2.get_csv_as_df(apply_dtypes=False, cache=False)
    assert 'column_a' not in df.columns
Example #11
0
def test_cache_method_is_called(source_file2: SourceFile):
    source_file2._cache_df_copy = MagicMock()
    source_file2.get_csv_as_df(apply_dtypes=False, cache=True)
    source_file2._cache_df_copy.assert_called_once()
Example #12
0
def test_get_csv_as_df_requires_encoding(source_file2: SourceFile):
    del source_file2._params['encoding']
    with pytest.raises(ValueError) as excinfo:
        source_file2.get_csv_as_df(apply_dtypes=False)
    assert "encoding" in str(excinfo.value)