def build(X_df=None, y_df=None):
    """Build features and target

    Args:
        X_df (DataFrame): raw variables
        y_df (DataFrame): raw target

    Returns:
        dict with keys X_df, features, mapper_X, X, y_df, encoder_y, y
    """
    if X_df is None:
        X_df, _ = load_data()
    if y_df is None:
        _, y_df = load_data()

    features = collect_contrib_features()
    mapper_X = FeatureEngineeringPipeline(features)
    X = mapper_X.fit_transform(X_df)

    encoder_y = get_target_encoder()
    y = encoder_y.fit_transform(y_df)

    return {
        'X_df': X_df,
        'features': features,
        'mapper_X': mapper_X,
        'X': X,
        'y_df': y_df,
        'encoder_y': encoder_y,
        'y': y,
    }
예제 #2
0
def test_df_colnames(input, transformer, output):
    feature = Feature(input, transformer, output=output)
    mapper = FeatureEngineeringPipeline(feature)
    entities_df = pd.util.testing.makeCustomDataframe(5, 2)
    entities_df.columns = ['foo', 'bar']
    feature_matrix = mapper.fit_transform(entities_df)
    feature_frame = pd.DataFrame(
        feature_matrix,
        columns=mapper.transformed_names_,
        index=entities_df.index,
    )
    assert fy.all(fy.isa(str), feature_frame.columns)