Beispiel #1
0
def main(dir_path, version):
    cleaned_df = clean_data(pd.read_csv(data_path), COLUMNS)

    df = pd.get_dummies(cleaned_df, prefix=CATEGORICAL_FEATURES)

    scaler = StandardScaler()
    random_forest = RandomForestClassifier()
    pipe = Pipeline([('scaler', StandardScaler()),
                     ('random_forest', RandomForestClassifier(n_estimators=5))
                     ])

    X = df.drop(TARGET, 1)
    y = df[TARGET]

    pipe.fit(X, y)

    serializer = Serializer(dir_path)

    serializer.save_model(
        pipe,
        serializer.generate_parameters(cleaned_df, CATEGORICAL_FEATURES,
                                       NUMERICAL_FEATURES), version)