Example #1
0
def test_datetime_featurizer_numpy_array_input():
    datetime_transformer = DateTimeFeaturizer()
    X = np.array([['2007-02-03'], ['2016-06-07'], ['2020-05-19']], dtype='datetime64')
    datetime_transformer.fit(X)
    assert list(datetime_transformer.transform(X).columns) == ["0_year", "0_month", "0_day_of_week", "0_hour"]
    assert datetime_transformer.get_feature_names() == {'0_month': {'February': 1, 'June': 5, 'May': 4},
                                                        '0_day_of_week': {'Saturday': 6, 'Tuesday': 2}}
Example #2
0
def test_datetime_featurizer_custom_features_to_extract():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=["month", "year"])
    rng = pd.date_range('2020-02-24', periods=20, freq='D')
    X = pd.DataFrame({"date col": rng, "numerical": [0] * len(rng)})
    datetime_transformer.fit(X)
    assert list(datetime_transformer.transform(X).columns) == ["numerical", "date col_month", "date col_year"]
    assert datetime_transformer.get_feature_names() == {"date col_month": {"February": 1, "March": 2}}
Example #3
0
def test_datetime_featurizer_no_features_to_extract():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=[])
    rng = pd.date_range('2020-02-24', periods=20, freq='D')
    X = pd.DataFrame({"date col": rng, "numerical": [0] * len(rng)})
    datetime_transformer.fit(X)
    assert datetime_transformer.transform(X).equals(X)
    assert datetime_transformer.get_feature_names() == {}
def test_datetime_featurizer_woodwork_custom_overrides_returned_by_components(with_datetime_col, encode_as_categories, X_df):
    override_types = [Integer, Double, Categorical, NaturalLanguage, Datetime]
    if with_datetime_col:
        X_df['datetime col'] = pd.to_datetime(['20200101', '20200519', '20190607'], format='%Y%m%d')
    for logical_type in override_types:
        try:
            X = ww.DataTable(X_df.copy(), logical_types={0: logical_type})
        except TypeError:
            continue
        datetime_transformer = DateTimeFeaturizer(encode_as_categories=encode_as_categories)
        datetime_transformer.fit(X)
        transformed = datetime_transformer.transform(X)
        assert isinstance(transformed, ww.DataTable)

        if with_datetime_col:
            if encode_as_categories:
                datetime_col_transformed = {'datetime col_year': Integer, 'datetime col_month': Categorical, 'datetime col_day_of_week': Categorical, 'datetime col_hour': Integer}
            else:
                datetime_col_transformed = {'datetime col_year': Integer, 'datetime col_month': Integer, 'datetime col_day_of_week': Integer, 'datetime col_hour': Integer}
            assert all(item in transformed.logical_types.items() for item in datetime_col_transformed.items())

        if logical_type == Datetime:
            if encode_as_categories:
                col_transformed = {'0_year': Integer, '0_month': Categorical, '0_day_of_week': Categorical, '0_hour': Integer}
            else:
                col_transformed = {'0_year': Integer, '0_month': Integer, '0_day_of_week': Integer, '0_hour': Integer}
            assert all(item in transformed.logical_types.items() for item in col_transformed.items())
        else:
            assert transformed.logical_types[0] == logical_type
Example #5
0
def test_datetime_featurizer_transform():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=["year"])
    X = pd.DataFrame({
        'Numerical 1':
        range(20),
        'Date Col 1':
        pd.date_range('2000-05-19', periods=20, freq='D'),
        'Date Col 2':
        pd.date_range('2000-02-03', periods=20, freq='W'),
        'Numerical 2': [0] * 20
    })
    X_test = pd.DataFrame({
        'Numerical 1':
        range(20),
        'Date Col 1':
        pd.date_range('2020-05-19', periods=20, freq='D'),
        'Date Col 2':
        pd.date_range('2020-02-03', periods=20, freq='W'),
        'Numerical 2': [0] * 20
    })
    datetime_transformer.fit(X)
    transformed_df = datetime_transformer.transform(X_test).to_dataframe()
    assert list(transformed_df.columns) == [
        'Numerical 1', 'Numerical 2', 'Date Col 1_year', 'Date Col 2_year'
    ]
    assert transformed_df["Date Col 1_year"].equals(
        pd.Series([2020] * 20, dtype="Int64"))
    assert transformed_df["Date Col 2_year"].equals(
        pd.Series([2020] * 20, dtype="Int64"))
    assert datetime_transformer.get_feature_names() == {}
def test_datetime_featurizer_no_datetime_cols():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=["year", "month"])
    X = pd.DataFrame([[1, 3, 4], [2, 5, 2]])
    expected = X.astype("Int64")
    datetime_transformer.fit(X)
    transformed = datetime_transformer.transform(X).to_dataframe()
    assert_frame_equal(expected, transformed)
    assert datetime_transformer.get_feature_names() == {}
Example #7
0
def test_datetime_featurizer_no_col_names():
    datetime_transformer = DateTimeFeaturizer()
    X = pd.DataFrame(pd.Series(pd.date_range('2020-02-24', periods=10, freq='D')))
    datetime_transformer.fit(X)
    assert list(datetime_transformer.transform(X).columns) == ['0_year', '0_month', '0_day_of_week', '0_hour']
    assert datetime_transformer.get_feature_names() == {'0_month': {'February': 1, 'March': 2},
                                                        '0_day_of_week': {'Monday': 1, 'Tuesday': 2,
                                                                          'Wednesday': 3, 'Thursday': 4, 'Friday': 5,
                                                                          'Saturday': 6, 'Sunday': 0}}
def test_datetime_featurizer_no_features_to_extract():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=[])
    rng = pd.date_range('2020-02-24', periods=20, freq='D')
    X = pd.DataFrame({"date col": rng, "numerical": [0] * len(rng)})
    expected = X.copy()
    expected["numerical"] = expected["numerical"].astype("Int64")
    datetime_transformer.fit(X)
    transformed = datetime_transformer.transform(X).to_dataframe()
    assert_frame_equal(expected, transformed)
    assert datetime_transformer.get_feature_names() == {}
Example #9
0
def test_datetime_featurizer_no_datetime_cols():
    datetime_transformer = DateTimeFeaturizer(features_to_extract=["year", "month"])
    X = pd.DataFrame([[1, 3, 4], [2, 5, 2]])
    datetime_transformer.fit(X)
    assert datetime_transformer.transform(X).equals(X)
    assert datetime_transformer.get_feature_names() == {}