def fit_evaluate(regr, X_train, X_val, y_train, y_val, log_y=False, scale=False, exclude_features=None):
    print("Evaluating ...")
    if y_val is None:
        X_train, y_train = separate_X_y(X_train, exclude_features)
        X_val, y_val = separate_X_y(X_val, exclude_features)

    if scale:
        scaler = RobustScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        # Fit on train, transforming the test, avoid data leak
        X_val = scaler.transform(X_val)

    if regr:
        regr.verbose = False
        if log_y:
            regr.fit(X_train, np.log(y_train))
            y_pred = np.exp(
                np.array(regr.predict(X_val), dtype=np.float128))
        else:
            regr.fit(X_train, y_train)
            y_pred = regr.predict(X_val)

    else:
        if log_y:
            theta = normal_equation.normal_equation(
                X_train, np.log(y_train))
            y_pred = np.exp(customSGD.predict(theta, X_val))
        else:
            theta = normal_equation.normal_equation(X_train, y_train)
            y_pred = customSGD.predict(theta, X_val)

    evaluate(y_val, y_pred)
def kfold_evaluate(regr, folds, scoring, log_y=False, k=5):
    rmse = []
    mse = []
    mae = []
    r2 = []
    i = 0
    for fold in folds:

        print("Evaluating %s" % (i))
        (X_train, X_val, y_train, y_val) = fold
        if regr == "customSGD":
            if log_y:
                theta = customSGD.SGD(lr=0.1, max_iter=20000,
                X=X_train, y=np.log(y_train), lr_optimizer='invscaling',
                print_interval=2000)
                y_pred = np.exp(customSGD.predict(theta, X_val))
            else:
                theta = normal_equation.normal_equation(X_train, y_train)
                y_pred = customSGD.predict(theta, X_val)
        elif regr:  # Any other Regressor from the SkLearn Library
            regr.verbose = False
            if log_y:
                regr.fit(X_train, np.log(y_train))
                y_pred = np.exp(
                    np.array(regr.predict(X_val), dtype=np.float128))
            else:
                regr.fit(X_train, y_train)
                y_pred = regr.predict(X_val)
        else:
            if log_y:
                theta = normal_equation.normal_equation(
                    X_train, np.log(y_train))
                y_pred = np.exp(customSGD.predict(theta, X_val))
            else:
                theta = normal_equation.normal_equation(X_train, y_train)
                y_pred = customSGD.predict(theta, X_val)

        rmse.append(math.sqrt(((y_pred-y_val)**2).mean()))
        mse.append(metrics.mean_squared_error(y_val, y_pred))
        mae.append(metrics.mean_absolute_error(y_val, y_pred))
        r2.append(metrics.r2_score(y_val, y_pred))
        i += 1

    print("RMSE: \t %.4f +/- %.4f" % (np.mean(rmse), np.std(rmse)))
    print("MSE:  \t %.4f +/- %.4f" % (np.mean(mse), np.std(mse)))
    print("MAE:  \t %.4f +/- %.4f" % (np.mean(mae), np.std(mae)))
    print('R2:   \t %.4f +/- %.4f' % (np.mean(r2), np.std(r2)))
def fit_eval_loss_customSGD(X_train, X_val, y_train, y_val, params={}, log_y=False, scale=False, exclude_features=None):
    print("Evaluating ...")

    if scale:
        scaler = RobustScaler()
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        # Fit on train, transforming the test, avoid data leak
        X_test = scaler.transform(X_val)

    
    if log_y:
        theta = customSGD.SGD(**params,X=X_train, y=np.log(y_train))
        y_pred = np.exp(customSGD.predict(theta, X_test))
    else:
        theta = customSGD.SGD(**params,X=X_train, y=y_train)
        y_pred = customSGD.predict(theta, X_test)

    evaluate(y_val, y_pred)    
Esempio n. 4
0
def normal_equation_test():
	X_, y_ = customSGD.get_toy_data_big()
	X,  X_val, y, y_val = model_selection.train_test_split(X_, y_, test_size=0.2, random_state=42)
	theta = normal_equation(X, y)
	y_pred = customSGD.predict(theta, X_val)
	error = math.sqrt(((y_pred-y_val)**2).mean())
	print("RMSE error: %.4f" % error)
	print("MSE: %.3f" % metrics.mean_squared_error(y_val, y_pred))
	print("MAE: %.3f" % metrics.mean_absolute_error(y_val, y_pred))
	print('R2: %.3f' % metrics.r2_score(y_val, y_pred))