}
        }
    }

    scores = []
    models = {}
    for model_name, mp in model_params.items():
        clf = GridSearchCV(mp["model"],
                           mp["params"],
                           cv=6,
                           return_train_score=False)
        clf.fit(X_train, y_train)
        scores.append({
            "model_name": model_name,
            "best_score": clf.best_score_,
            "best_params": clf.best_params_
        })
        models[model_name] = clf

    scores_df = pd.DataFrame(
        scores, columns=['model_name', 'best_score', 'best_params'])
    scores_df.sort_values(by='best_score', ascending=False, inplace=True)
    print(scores_df)

    best_model_name = scores_df.iloc[0]['model_name']
    best_clf = models[best_model_name]

    model_filepath = args['model']
    print(f'≫ Storing "{best_model_name}" in "{model_filepath}"')
    store_model(best_clf, model_filepath)
예제 #2
0
    parameters = {
        'fit_intercept': [True, False],
        'normalize': [True, False],
    }
    model = GridSearchCV(linreg, param_grid=parameters)
    model.fit(X_train, y_train)
    print('> Best parameters:', model.best_params_)
    y_pred = model.predict(X_test)

    r2 = evaluate_regression(y_test, y_pred)
    models[r2] = model.best_estimator_

    print('Random Forest')
    forest = RandomForestRegressor()
    parameters = {
        'n_estimators': [75, 100, 300],
        'max_depth': [2, 5, None],
    }
    model = GridSearchCV(forest, param_grid=parameters)
    model.fit(X_train, y_train)
    print('> Best parameters:', model.best_params_)
    y_pred = model.predict(X_test)

    r2 = evaluate_regression(y_test, y_pred)
    models[r2] = model.best_estimator_

    best = models[max(models)]
    model_filepath = args['model']
    print(f'≫ Storing Model {type(best)} in "{model_filepath}"')
    store_model(best, model_filepath)
예제 #3
0
def test_filepath_format():
    with pytest.raises(ValueError, match="filepath should end with specific extension"):
        store_model(model, 'filepath')
예제 #4
0
def test_new_number_of_columns(dump_mock):
    store_model(model, 'filepath.pkl')
    dump_mock.assert_called_once_with(model, 'filepath.pkl')
예제 #5
0
def test_model_datatype_error():
    with pytest.raises(TypeError, match="model should be a sklearn base estimator"):
        store_model(None, 'filepath')