def engineer_features(input_dir, output_dir):
    """Engineer features"""
    X_df, y_df = api.load_data(input_dir=input_dir)
    result = api.engineer_features()
    pipeline, encoder = result.pipeline, result.encoder

    X_ft = pipeline.transform(X_df)
    y_ft = encoder.transform(y_df)

    save_features(X_ft, output_dir)
    save_targets(y_ft, output_dir)
Exemple #2
0
def main(input_dir, output_dir):
    """Engineer features"""

    import ballet.util.log
    ballet.util.log.enable(logger=logger, level='INFO', echo=False)
    ballet.util.log.enable(logger=ballet.util.log.logger, level='INFO',
                           echo=False)

    X_df, y_df = load_data(input_dir=input_dir)
    out = build()

    mapper_X = out.mapper_X
    encoder_y = out.encoder_y

    X_ft = mapper_X.transform(X_df)
    y_ft = encoder_y.transform(y_df)

    save_features(X_ft, output_dir)
    save_targets(y_ft, output_dir)
Exemple #3
0
def engineer_features(input_dir, output_dir, train_dir):
    """Engineer features"""
    # load pipeline trained on development set
    if train_dir is not None:
        X_df_tr, y_df_tr = api.load_data(input_dir=train_dir)
        result = api.engineer_features(X_df_tr, y_df_tr)
    else:
        result = api.engineer_features()
    pipeline, encoder = result.pipeline, result.encoder

    # load input data
    X_df, y_df = api.load_data(input_dir=input_dir)

    # transform entities and targets
    X_ft = pipeline.transform(X_df)
    y_ft = encoder.transform(y_df)

    # save to output dir
    save_features(X_ft, output_dir)
    save_targets(y_ft, output_dir)