Exemple #1
0
def test_report_is_displayed_properly(capsys):
    keep_values = [1, 2, 3]
    remove_values = ['remove', 'this']
    df = pd.DataFrame.from_dict({'column': keep_values + remove_values})
    sdf = StrictDataFrame(df)
    sdf.report()
    report_message = capsys.readouterr().out
    expected_report_message = (f'DataFrame having shape {sdf.new_df.shape}'
                               f' ({len(remove_values)} rows removed from '
                               f'original)\n')
    assert report_message == expected_report_message
Exemple #2
0
def test_can_retrieve_dtypes_dict_from_strict_data_frame():
    values = [1, 0, 0, 2, True]
    df = pd.DataFrame.from_dict({'int_column': values})
    sdf = StrictDataFrame(df)

    dtypes_dict = sdf.dtypes
    assert isinstance(dtypes_dict, dict)
Exemple #3
0
def test_parse_bool_values_from_mixed_bool_column():
    bool_values = ['true', 'True', 'False']
    non_bool_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict(
        {'mixed_column': bool_values + non_bool_values})

    sdf = StrictDataFrame(df)
    assert all([value.dtype == bool for value in sdf.new_df.mixed_column])
Exemple #4
0
def test_non_int_are_removed_from_mixed_int_column():
    int_values = ['1', '2', '3']
    non_int_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict({'mixed_column': int_values + non_int_values})

    sdf = StrictDataFrame(df)
    assert not any(
        [value in sdf.new_df.mixed_column for value in non_int_values])
Exemple #5
0
def test_parse_str_values_from_mixed_str_column():
    str_values = ['string_1', 'string_2', 'string_3']
    non_str_values = [5, 6]
    df = pd.DataFrame.from_dict({'mixed_column': str_values + non_str_values})

    sdf = StrictDataFrame(df)
    assert all([isinstance(value, str) for value in sdf.new_df.mixed_column])

    assert all([any(value == sdf.new_df.mixed_column) for value in str_values])
Exemple #6
0
def test_non_bool_are_removed_from_mixed_bool_column():
    bool_values = ['true', 'True', 'False', 'false']
    non_bool_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict(
        {'mixed_column': bool_values + non_bool_values})

    sdf = StrictDataFrame(df)
    assert not any(
        [value in sdf.new_df.mixed_column for value in non_bool_values])
Exemple #7
0
def test_all_parsed_bool_values_from_mixed_bool_column_are_bool():
    bool_values = [True, True, False]
    non_bool_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict(
        {'mixed_column': bool_values + non_bool_values})

    sdf = StrictDataFrame(df)
    assert all(
        [any(value == sdf.new_df.mixed_column) for value in bool_values])
Exemple #8
0
def test_parse_int_values_from_mixed_int_column():
    int_values = ['1', '2', '3']
    non_int_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict({'mixed_column': int_values + non_int_values})

    sdf = StrictDataFrame(df)
    assert all([isinstance(value, int) for value in sdf.new_df.mixed_column])

    assert all(
        [any(int(value) == sdf.new_df.mixed_column) for value in int_values])
Exemple #9
0
def test_bool_values_are_parsed_correctly_from_strings():
    correct_values = [True, True, False, False]
    bool_values = ['true', 'True', 'False', 'false']
    df = pd.DataFrame.from_dict({'bool_column': bool_values})

    sdf = StrictDataFrame(df)
    assert all([
        value == sdf.new_df.bool_column[index]
        for index, value in enumerate(correct_values)
    ])
Exemple #10
0
def test_parse_float_values_from_mixed_float_column():
    float_values = ['1.5', '2.5', '3.5']
    non_int_values = ['string_1', 'string_2']
    df = pd.DataFrame.from_dict(
        {'mixed_column': float_values + non_int_values})

    sdf = StrictDataFrame(df)
    assert all([isinstance(value, float) for value in sdf.new_df.mixed_column])

    assert all([
        any(float(value) == sdf.new_df.mixed_column) for value in float_values
    ])
Exemple #11
0
def test_dtypes_are_generated_correctly_for_each_column():
    int_column = [5, 8, 2, 2, True]
    str_column = ['string_1', 'string_2', 5, 'string_3', 'string_4']
    float_column = [1.5, 2.5, 3.5, False, 4.5]
    bool_column = ['true', 'True', 'False', 'false', 'not_a_bool']

    df = pd.DataFrame.from_dict({
        'int_column': int_column,
        'str_column': str_column,
        'float_column': float_column,
        'bool_column': bool_column
    })
    sdf = StrictDataFrame(df)

    dtypes_dict = sdf.dtypes
    assert dtypes_dict['int_column'] == 'int64'
    assert dtypes_dict['str_column'] == 'str'
    assert dtypes_dict['float_column'] == 'float64'
    assert dtypes_dict['bool_column'] == 'bool'
Exemple #12
0
def test_old_df_is_original_df():
    df = pd.DataFrame.from_dict({'column_1': [1, 2]})
    sdf = StrictDataFrame(df)
    pd.testing.assert_frame_equal(df, sdf.old_df)
Exemple #13
0
def test_not_using_dataframe_raises_exception():
    with pytest.raises(TypeError):
        StrictDataFrame('Not a pd.DataFrame')
Exemple #14
0
def test_bool_is_removed_if_is_not_int_bool_column():
    values = [1, 0, 0, 2, True]
    df = pd.DataFrame.from_dict({'int_column': values})
    sdf = StrictDataFrame(df)
    assert not any(
        [isinstance(value, bool) for value in sdf.new_df.int_column])
Exemple #15
0
def test_bool_are_parsed_from_int_bool_column():
    int_bool_values = [1, 0, 0, True]
    df = pd.DataFrame.from_dict({'bool_column': int_bool_values})
    sdf = StrictDataFrame(df)
    assert all([isinstance(value, bool) for value in sdf.new_df.bool_column])
Exemple #16
0
def test_new_df_is_created_with_valid_dict():
    df = pd.DataFrame.from_dict({'column_1': [1, 2]})
    sdf = StrictDataFrame(df)
    assert isinstance(sdf.new_df, pd.DataFrame)