예제 #1
0
def test_eda_check_all_attributes_generated():
    test_data = get_test_data()

    eda_results = eda.eda(test_data)

    assert 'histograms' in eda_results
    assert 'stats' in eda_results
예제 #2
0
def test_eda_check_generated_histograms_has_expected_properties():
    input_data = get_test_data()
    eda_results = eda.eda(input_data)

    actual_histograms = eda_results["histograms"]

    for col_name, hist in actual_histograms.items():
        assert hist.mark == 'bar'
        assert hist.title == 'The histogram of ' + str(col_name)
        assert_frame_equal(hist.data, input_data)
예제 #3
0
def test_eda_check_all_histograms_generated():
    input_data = get_test_data()
    eda_results = eda.eda(input_data)

    actual_histograms = eda_results["histograms"]

    assert 3 == len(actual_histograms)

    for col_name, histogram in actual_histograms.items():
        assert isinstance(histogram, altair.vegalite.v3.api.Chart), \
            "All histograms should be instance of Altair Charts."
예제 #4
0
def test_eda_check_stats_generated_as_expected_when_column_is_categorical():
    input_data = get_test_data()
    eda_results = eda.eda(input_data)

    expected_stats = {'count': 10, 'unique': 2,
                      'top': 'category1', 'freq': 6}

    actual_stats = eda_results["stats"]["my_attribute_two"]

    for key, expected_value in expected_stats.items():
        actual_value = actual_stats.loc[key].get("my_attribute_two")
        assert actual_value == expected_value, \
            "actual stat should be equal to expected stat."
예제 #5
0
def test_eda_check_stats_generated_as_expected_when_column_is_numerical():
    input_data = get_test_data()
    eda_results = eda.eda(input_data)

    expected_stats = {'count': 10.000000, 'mean': 0.125267,
                      'std': 0.863782, 'min': -1.316094,
                      '25%': -0.469274, '50%': 0.161249,
                      '75%': 0.815977, 'max': 1.465459}

    actual_stats = eda_results["stats"]["my_attribute_one"]

    for key, expected_value in expected_stats.items():
        actual_value = actual_stats.loc[key].get("my_attribute_one")
        assert round(actual_value, 6) == expected_value, \
            "actual stat should be equal to expected stat."
예제 #6
0
def test_eda_raise_error_when_provided_data_frame_has_non_string_columns():
    test_input = get_test_data()
    test_input[0] = np.random.randn(10, 1)

    with pytest.raises(ValueError, match="All column names should be string."):
        eda.eda(test_input)
예제 #7
0
def test_eda_raise_error_when_provided_data_frame_is_empty():
    test_input = pandas.DataFrame()

    with pytest.raises(ValueError, match="input_data should contain at least one axis."):
        eda.eda(test_input)
예제 #8
0
def test_eda_raise_error_when_provided_data_is_not_data_frame():
    test_input = [1, 2, 3, 4, 5]

    with pytest.raises(ValueError, match="input_data must be instance of pandas.core.frame.DataFrame."):
        eda.eda(test_input)