def train_pipeline(training_pipeline_params: Params):
    # train, val data
    logger.info(f"start train pipeline with params {training_pipeline_params}")
    data = read_data(training_pipeline_params.train_data_path)
    logger.info(f"data.shape is {data.shape}")
    train_df, val_df = split_train_val_data(
        data, training_pipeline_params.splitting_params)
    logger.info(f"train_df.shape is {train_df.shape}")
    logger.info(f"val_df.shape is {val_df.shape}")

    # features extraction
    train_target = extract_target(train_df,
                                  training_pipeline_params.feature_params)
    transformer = Features_transformer(training_pipeline_params.feature_params)
    transformer.fit(
        train_df.drop(
            columns=training_pipeline_params.feature_params.target_col))
    train_features = make_features(
        transformer,
        train_df.drop(
            columns=training_pipeline_params.feature_params.target_col))
    logger.info(f"train_features.shape is {train_features.shape}")
    val_target = extract_target(val_df,
                                training_pipeline_params.feature_params)
    val_features = make_features(
        transformer,
        val_df.drop(
            columns=training_pipeline_params.feature_params.target_col))
    logger.info(f"val_features.shape is {val_features.shape}")

    # train and score
    model = train_model(train_features, train_target,
                        training_pipeline_params.train_params)
    predicts = predict_model(model, val_features)
    metrics = evaluate_model(predicts, val_target)
    logger.info(f"metrics is {metrics}")

    # save
    path_to_feature_transformer = serialize_features_transformer(
        transformer, training_pipeline_params.features_transformer_path)
    path_to_model = serialize_model(model, training_pipeline_params.model_path)
    path_to_metrics = serialize_metrics(metrics,
                                        training_pipeline_params.metric_path)
    logger.info(f"transformer, model and metrics were saved")

    return path_to_feature_transformer, path_to_model, path_to_metrics, metrics
def test_make_features():
    transformer = ColumnTransformer([("norm1", Normalizer(norm='l1'), [0, 1]),
                                     ("norm2", Normalizer(norm='l1'),
                                      slice(2, 4))])

    data_df = pd.DataFrame([[0., 1., 2., 2.], [1., 1., 0., 1.]])
    transformer.fit(data_df)

    result_df = pd.DataFrame([[0., 1., 0.5, 0.5], [0.5, 0.5, 0., 1.]])
    df_features = make_features(transformer, data_df)

    assert df_features.values.tolist() == result_df.values.tolist()
예제 #3
0
def predict(predict_config):
    test_df = read_data(to_absolute_path(predict_config.test_data_path))
    test_df = test_df.drop(predict_config.feature_params.target_col, axis=1)

    model_path = to_absolute_path(predict_config.output_model_path)
    model = load_model(model_path)

    transformer = load_transformer(
        to_absolute_path(predict_config.feature_transformer_path))
    test_features = make_features(transformer, test_df)
    y_pred = pd.DataFrame(model.predict_proba(test_features)[:, 1],
                          columns=["target"])

    y_pred.to_csv(to_absolute_path(predict_config.predict_path), index=False)
예제 #4
0
def inference_pipeline(inference_pipeline_params: Params):
    # train, val data
    logger.info(f"start inference pipeline with params {inference_pipeline_params.inference_params}")
    data = read_data(inference_pipeline_params.inference_params.source_data_path)
    logger.info(f"data.shape is {data.shape}")

    # features extraction
    transformer = load_transformer(inference_pipeline_params.features_transformer_path)
    data_features = make_features(transformer, data)
    logger.info(f"data_features.shape is {data.shape}")

    # predict
    model = load_model(inference_pipeline_params.model_path)
    predicts = predict_model(model, data_features)
    logger.info(f"predicts shape is {predicts.shape}")

    # save
    path_to_predics = save_predicts(data, predicts, inference_pipeline_params.inference_params.result_data_path)
    logger.info(f"predicted data was saved")

    return path_to_predics, predicts
예제 #5
0
def inference_pipeline(data: pd.DataFrame, transformer: Features_transformer,
                       model: SklearnClassificationModel):
    data_features = make_features(transformer, data)
    data['predicted_class'] = model.predict(data_features)
    return data[['predicted_class']].to_json(orient='index')