Esempio n. 1
0
def test_features_raw_data_no_target(data_classification_balanced, feature_descriptor):
    """Testing if raw_dataframe() drops Target column when drop_target=True."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    expected_df = X.drop(["Date"], axis=1)
    cols = expected_df.columns

    actual_df = f.raw_data(drop_target=True)[cols]

    assert actual_df.equals(expected_df)
Esempio n. 2
0
def test_features_numerical_features_no_target(
        feature_list, target, expected, data_classification_balanced, feature_descriptor
):
    """Testing if .numerical_features() returns correct values when drop_target = True (without Target feature name)."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    f._numerical_features = feature_list
    f.target = target
    actual = f.numerical_features(drop_target=True)

    assert actual == expected
Esempio n. 3
0
def test_features_create_mapped_dataframe(data_classification_balanced, feature_descriptor, expected_raw_mapping):
    """Testing if ._create_mapped_dataframe correctly returns mapped dataframe (with replaced values according to
    mapping). """
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    expected_df = pd.concat([X, y], axis=1).drop(["Date"], axis=1).replace(expected_raw_mapping)
    cols = expected_df.columns

    actual_df = f._create_mapped_dataframe()[cols]

    assert actual_df.equals(expected_df)
Esempio n. 4
0
def test_features_data(data_classification_balanced, feature_descriptor, expected_raw_mapping):
    """Testing if .data() returns mapped df (with replaced values according to mapping) but without Target column (
    when drop_target=True). """
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    expected_df = X.drop(["Date"], axis=1).replace(expected_raw_mapping)
    cols = expected_df.columns

    actual_df = f.data(drop_target=True)[cols]

    assert actual_df.equals(expected_df)
Esempio n. 5
0
def test_features_create_mapping(data_classification_balanced, feature_descriptor, expected_mapping):
    """Testing if ._create_mapping() creates a correct mapping dictionary."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    expected = expected_mapping
    for feat in ["Height", "Price"]:
        expected[feat] = None

    actual = f.mapping()

    assert actual == expected
Esempio n. 6
0
def test_features_create_raw_dataframe(data_classification_balanced, feature_descriptor):
    """Testing if .create_raw_dataframe returns correct dataframe (the same that was provided as input to the
    object). """
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    expected_df = pd.concat([X, y], axis=1).drop(["Date"], axis=1)
    cols = expected_df.columns

    actual_df = f._create_raw_dataframe()[cols]

    assert actual_df.equals(expected_df)
Esempio n. 7
0
def test_analyzer_numeric_describe_no_numerical_features(
        data_classification_balanced):
    """Testing if numeric_describe() returns None when there are no numerical columns present."""
    numerical_cols = ["Price", "Height"]

    X, y = data_classification_balanced
    X = X.drop(numerical_cols, axis=1)
    f = Features(X, y, None)
    analyzer = Analyzer(f)
    actual_result = analyzer.numerical_describe_df()

    assert actual_result is None
Esempio n. 8
0
def test_features_numerical_features_exclude_transformed(
        data_classification_balanced, feature_descriptor, transformed_features
):
    """Testing if returning numerical features list with transformed columns excluded works properly."""
    col_list = ["Height", "Price"]
    X, y = data_classification_balanced

    f = Features(X, y, feature_descriptor, transformed_features)
    actual_result = f.numerical_features(exclude_transformed=True)
    if transformed_features:
        expected_result = [feature for feature in col_list if feature not in transformed_features]
    else:
        expected_result = col_list

    assert actual_result == expected_result
Esempio n. 9
0
def test_features_categorical_features_exclude_transformed(
        data_classification_balanced, feature_descriptor, transformed_features
):
    """Testing if returning categorical features list with transformed columns excluded works properly."""
    col_list = ["AgeGroup", "bool", "Product", "Sex", "Target"]
    X, y = data_classification_balanced

    f = Features(X, y, feature_descriptor, transformed_features)
    actual_result = f.categorical_features(exclude_transformed=True)
    if transformed_features:
        expected_result = [feature for feature in col_list if feature not in transformed_features]
    else:
        expected_result = col_list

    assert actual_result == expected_result
Esempio n. 10
0
def test_features_raw_data_excluded_transformed(data_classification_balanced, feature_descriptor, transformed_columns):
    """Testing if raw_data returns correct dataframe without transformed columns when excluded_transformed
    is set to True."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor, transformed_columns)

    expected_df = X.drop(["Date"], axis=1)
    if transformed_columns:
        expected_df = expected_df.drop(transformed_columns, axis=1)

    cols = expected_df.columns

    actual_df = f.raw_data(exclude_transformed=True)[cols]

    assert actual_df.equals(expected_df)
Esempio n. 11
0
def test_analyzer_categorical_describe_no_categorical_features(
        data_classification_balanced):
    """Testing if categorical_describe() returns None when there are no categorical columns present."""
    numerical_cols = [
        "Price", "Height"
    ]  # its easier to provide numerical columns instead of dropping all categorical

    data = data_classification_balanced[0]
    X = data[numerical_cols[:1]]
    y = data[numerical_cols[1]]
    f = Features(X, y, None)
    analyzer = Analyzer(f)
    actual_result = analyzer.categorical_describe_df()

    assert actual_result is None
Esempio n. 12
0
def test_features_analyze_features_transformed_features(
        data_classification_balanced, feature_descriptor, transformed_features
):
    """Testing if creating features properly assigns Transformed flag based on provided transformed_features
    sequence."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor, transformed_features)

    f.original_dataframe = pd.concat([X, y], axis=1)  # original_dataframe needs to be set up
    actual = f._analyze_features(feature_descriptor)

    for feature in actual.keys():
        if feature in transformed_features:
            assert actual[feature].transformed
        else:
            assert not actual[feature].transformed
Esempio n. 13
0
def test_features_create_descriptions(data_classification_balanced, feature_descriptions, feature_descriptor):
    """Testing if ._create_descriptions creates a correct descriptions dictionary."""
    placeholder = Features._description_not_available
    d = FeatureDescriptor._description
    assert True
    expected_descriptions = {}
    for feat in ["Sex", "Height", "Product", "Price", "bool", "Target"]:
        expected_descriptions[feat] = feature_descriptions[feat][d]

    expected_descriptions["AgeGroup"] = placeholder

    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    actual_descriptions = f.descriptions()

    assert actual_descriptions == expected_descriptions
Esempio n. 14
0
def test_features_create_raw_dataframe_preserving_index(data_classification_balanced, feature_descriptor):
    """Testing if create_raw_dataframe preserves the index of the DataFrame."""
    X, y = data_classification_balanced
    not_expected_df = pd.concat([X, y], axis=1).drop(["Date"], axis=1)

    length = X.shape[0]
    new_ind = list(range(100, length + 100))
    X.index = new_ind
    y.index = new_ind
    f = Features(X, y, feature_descriptor)
    expected_df = pd.concat([X, y], axis=1).drop(["Date"], axis=1)
    expected_df.index = new_ind

    cols = expected_df.columns
    actual_df = f._create_raw_dataframe()[cols]

    assert not actual_df.equals(not_expected_df)
    assert actual_df.equals(expected_df)
Esempio n. 15
0
def test_features_features_list_no_target(
        data_classification_balanced, feature_descriptor_type, feature_descriptor, feature_descriptor_forced_categories
):
    """Testing if .features() returns correct values when drop_target = True (without Target feature name)."""
    expected = ["AgeGroup", "bool", "Height", "Price", "Product", "Sex"]
    X, y = data_classification_balanced

    # couldn't find a way to incorporate fixtures into @pytest.mark.parametrize
    if feature_descriptor_type == "normal":
        fd = feature_descriptor
    elif feature_descriptor_type == "forced":
        fd = feature_descriptor_forced_categories
    else:
        raise

    f = Features(X, y, fd)
    actual = f.features(drop_target=True)

    assert actual == expected
Esempio n. 16
0
def test_features_create_categorical_features(
        data_classification_balanced, feature_descriptor_type, expected,
        feature_descriptor, feature_descriptor_forced_categories
):
    """Testing if ._create_categorical_features() returns correct values depending on the Features provided."""
    X, y = data_classification_balanced

    # couldn't find a way to incorporate fixtures into @pytest.mark.parametrize
    if feature_descriptor_type == "normal":
        fd = feature_descriptor
    elif feature_descriptor_type == "forced":
        fd = feature_descriptor_forced_categories
    else:
        raise

    f = Features(X, y, fd)

    actual = f._create_categorical_features()

    assert actual == expected
Esempio n. 17
0
def test_features_impute_column_type(data_classification_balanced, column_name, expected_type):
    """Testing if imputing column type works correctly."""
    X = data_classification_balanced[0]
    y = data_classification_balanced[1]
    df = pd.concat([X, y], axis=1)
    f = Features(X, y)

    cat = f._categorical
    num = f._numerical
    dat = f._date

    if expected_type == "categorical":
        expected = cat
    elif expected_type == "numerical":
        expected = num
    elif expected_type == "date":
        expected = dat
    else:
        raise

    actual = f._impute_column_type(df[column_name])

    assert actual == expected
Esempio n. 18
0
def test_features_analyze_features(data_classification_balanced, feature_descriptor):
    """Testing if .analyze_features() method of Features class returns a dictionary with a correct content"""
    n = NumericalFeature
    c = CategoricalFeature
    expected = {
        "Sex": c,
        "AgeGroup": c,
        "Height": n,
        "Product": c,
        "Price": n,
        "bool": c,
        "Target": c
    }

    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    f.original_dataframe = pd.concat([X, y], axis=1)  # original_dataframe needs to be set up
    actual = f._analyze_features(feature_descriptor)

    assert isinstance(actual, dict)
    for key, item in expected.items():
        assert isinstance(actual[key], item)
Esempio n. 19
0
def fixture_features(data_classification_balanced, feature_descriptor):
    """Fixture Features object for data_classification_balanced test data."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)
    return f
Esempio n. 20
0
def fixture_features_multiclass(data_multiclass, feature_descriptor):
    """Fixture Features for multiclass problem."""
    X, y = data_multiclass
    f = Features(X, y, feature_descriptor)
    return f
Esempio n. 21
0
def test_features_unused_features(data_classification_balanced, feature_descriptor):
    """Testing if unused_features() returns correct values."""
    X, y = data_classification_balanced
    f = Features(X, y, feature_descriptor)

    assert f.unused_features() == ["Date"]