def test_extract_datetime_features_with_default_options( df_datetime, df_datetime_transformed): transformer = DatetimeFeatures() X = transformer.fit_transform(df_datetime) pd.testing.assert_frame_equal( X, df_datetime_transformed[ vars_non_dt + [var + feat for var in vars_dt for feat in feat_names_default]], )
def test_extract_features_from_different_timezones(df_datetime, df_datetime_transformed): time_zones = [4, -1, 9, -7] tz_df = pd.DataFrame( {"time_obj": df_datetime["time_obj"].add(["+4", "-1", "+9", "-7"])}) transformer = DatetimeFeatures(variables="time_obj", features_to_extract=["hour"], utc=True) X = transformer.fit_transform(tz_df) pd.testing.assert_frame_equal( X, df_datetime_transformed[["time_obj_hour" ]].apply(lambda x: x.subtract(time_zones)), ) exp_err_msg = ("ValueError: variable(s) time_obj " "could not be converted to datetime. Try setting utc=True") with pytest.raises(ValueError) as errinfo: assert DatetimeFeatures(variables="time_obj", features_to_extract=["hour"], utc=False).fit_transform(tz_df) assert str(errinfo.value) == exp_err_msg
def test_get_feature_names_out(df_datetime, df_datetime_transformed): # default features from all variables transformer = DatetimeFeatures() X = transformer.fit_transform(df_datetime) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=vars_dt) == [ var + feat for var in vars_dt for feat in feat_names_default ] assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [ "date_obj1" + feat for feat in feat_names_default ] # default features from 1 variable transformer = DatetimeFeatures(variables="date_obj1") X = transformer.fit_transform(df_datetime) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [ "date_obj1" + feat for feat in feat_names_default ] # all features transformer = DatetimeFeatures(features_to_extract="all") X = transformer.fit_transform(df_datetime) assert list(X.columns) == transformer.get_feature_names_out() feat_names_all = [FEATURES_SUFFIXES[feat] for feat in FEATURES_SUPPORTED] assert transformer.get_feature_names_out(input_features=vars_dt) == [ var + feat for var in vars_dt for feat in feat_names_all ] assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [ "date_obj1" + feat for feat in feat_names_all ] # specified features transformer = DatetimeFeatures(features_to_extract=["semester", "week"]) X = transformer.fit_transform(df_datetime) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=vars_dt) == [ var + "_" + feat for var in vars_dt for feat in ["semester", "week"] ] assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [ "date_obj1_" + feat for feat in ["semester", "week"] ] # features were extracted from index transformer = DatetimeFeatures(variables="index", features_to_extract=["semester", "week"]) X = transformer.fit_transform(dates_idx_dt) # user passes something else than index as input_features with pytest.raises(ValueError): transformer.get_feature_names_out(input_features="not_index") with pytest.raises(ValueError): transformer.get_feature_names_out( input_features=["still", "not", "index"]) # input_features is None assert list(X.columns) == transformer.get_feature_names_out() # input_features is index assert ["semester", "week" ] == transformer.get_feature_names_out(input_features="index") # when drop original is False transformer = DatetimeFeatures(drop_original=False) X = transformer.fit_transform(df_datetime) assert list(X.columns) == transformer.get_feature_names_out() assert transformer.get_feature_names_out(input_features=vars_dt) == [ var + feat for var in vars_dt for feat in feat_names_default ] assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [ "date_obj1" + feat for feat in feat_names_default ] with pytest.raises(ValueError): # assert error when user passes a string instead of list transformer.get_feature_names_out(input_features="date_obj1") with pytest.raises(ValueError): # assert error when uses passes features that were not lagged transformer.get_feature_names_out(input_features=["color"])