def build(X_df=None, y_df=None):
    """Build features and target

    Args:
        X_df (DataFrame): raw variables
        y_df (DataFrame): raw target

    Returns:
        dict with keys X_df, features, mapper_X, X, y_df, encoder_y, y
    """
    if X_df is None:
        X_df, _ = load_data()
    if y_df is None:
        _, y_df = load_data()

    features = collect_contrib_features()
    mapper_X = FeatureEngineeringPipeline(features)
    X = mapper_X.fit_transform(X_df)

    encoder_y = get_target_encoder()
    y = encoder_y.fit_transform(y_df)

    return {
        'X_df': X_df,
        'features': features,
        'mapper_X': mapper_X,
        'X': X,
        'y_df': y_df,
        'encoder_y': encoder_y,
        'y': y,
    }
예제 #2
0
def test_transform(input, transformer):
    feature = Feature(input, transformer)
    mapper = FeatureEngineeringPipeline(feature)
    df = pd.util.testing.makeCustomDataframe(5, 2)
    df.columns = ['foo', 'bar']
    mapper.fit(df)
    X = mapper.transform(df)
    assert np.shape(X) == (5, 2)
예제 #3
0
 def test_transform(self):
     feature = Feature(self.input, self.transformer)
     mapper = FeatureEngineeringPipeline(feature)
     df = pd.util.testing.makeCustomDataframe(5, 2)
     df.columns = ['foo', 'bar']
     mapper.fit(df)
     X = mapper.transform(df)
     self.assertEqual(np.shape(X), (5, 1))
예제 #4
0
def test_df_colnames(input, transformer, output):
    feature = Feature(input, transformer, output=output)
    mapper = FeatureEngineeringPipeline(feature)
    entities_df = pd.util.testing.makeCustomDataframe(5, 2)
    entities_df.columns = ['foo', 'bar']
    feature_matrix = mapper.fit_transform(entities_df)
    feature_frame = pd.DataFrame(
        feature_matrix,
        columns=mapper.transformed_names_,
        index=entities_df.index,
    )
    assert fy.all(fy.isa(str), feature_frame.columns)
예제 #5
0
def test_can_deepcopy():
    # see GH 90
    feature = Feature('size', IdentityTransformer())
    pipeline = FeatureEngineeringPipeline(feature)
    assert hasattr(pipeline, '_ballet_features')
    pipeline2 = deepcopy(pipeline)
    assert hasattr(pipeline2, '_ballet_features')
예제 #6
0
 def pipeline(self) -> FeatureEngineeringPipeline:
     """Get the feature engineering pipeline from the existing features"""
     return FeatureEngineeringPipeline(self.features)
예제 #7
0
def test_init(input, transformer):
    feature = Feature(input, transformer)
    mapper = FeatureEngineeringPipeline(feature)
    assert isinstance(mapper, FeatureEngineeringPipeline)
예제 #8
0
 def test_fit(self):
     feature = Feature(self.input, self.transformer)
     mapper = FeatureEngineeringPipeline(feature)
     df = pd.util.testing.makeCustomDataframe(5, 2)
     df.columns = ['foo', 'bar']
     mapper.fit(df)
예제 #9
0
 def test_init_scalar(self):
     feature = Feature(self.input, self.transformer)
     mapper = FeatureEngineeringPipeline(feature)
     self.assertIsInstance(mapper, FeatureEngineeringPipeline)
예제 #10
0
 def test_init_seqcont(self):
     feature = Feature(self.input, self.transformer)
     features = [feature]
     mapper = FeatureEngineeringPipeline(features)
     self.assertIsInstance(mapper, FeatureEngineeringPipeline)