Example #1
0
def test_factory():
    factory = RegressorsFactory()
    try:
        from rep.estimators.tmva import TMVARegressor
        factory.add_regressor('tmva', TMVARegressor())
    except ImportError:
        pass
    factory.add_regressor('rf', RandomForestRegressor(n_estimators=10))
    factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X,
                                  y,
                                  sample_weight=sample_weight,
                                  features=list(X.columns))
    values = factory.predict(X)

    for cl in factory.values():
        assert list(cl.features) == list(X.columns)

    for key, val in values.items():
        score = mean_squared_error(y, val)
        print(score)
        assert score < 0.2

    for key, iterator in factory.staged_predict(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), )

        # checking that last iteration coincides with previous
        assert numpy.all(p == values[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict(X)
    probs2 = clf_loaded.predict(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = RegressionReport({'rf': factory['rf']},
                              LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True,
                                                               figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    report.feature_importance()
    report.features_correlation_matrix()
    report.predictions_scatter()

    val = numpy.mean(X['column0'])
    report_mask(report, "column0 > %f" % val, X)
    report_mask(report, lambda x: numpy.array(x['column0']) < val, X)
    report_mask(report, None, X)
Example #2
0
 def reportPerformance(self, X, y):
     y_pred = self.reg.predict(X)
     print "Explained variance: {0:.5f}".format(
         metrics.explained_variance_score(y, y_pred)), "\n"
     print "Mean abs error: {0:.5f}".format(
         metrics.mean_absolute_error(y, y_pred)), "\n"
     print "Mean sqrt error: {0:.5f}".format(
         metrics.mean_squared_error(y, y_pred)), "\n"
     print "R2 score: {0:.5f}".format(metrics.r2_score(y, y_pred)), "\n"
Example #3
0
def measure_performance(est, X, y):
    y_pred = est.predict(X)
    print "Explained variance: {0:.5f}".format(
        metrics.explained_variance_score(y, y_pred)), "\n"
    print "Mean abs error: {0:.5f}".format(
        metrics.mean_absolute_error(y, y_pred)), "\n"
    print "Mean sqrt error: {0:.5f}".format(
        metrics.mean_squared_error(y, y_pred)), "\n"
    print "R2 score: {0:.5f}".format(metrics.r2_score(y, y_pred)), "\n"
Example #4
0
def test_factory():
    factory = RegressorsFactory()
    try:
        from rep.estimators.tmva import TMVARegressor
        factory.add_regressor('tmva', TMVARegressor())
    except ImportError:
        pass
    factory.add_regressor('rf', RandomForestRegressor(n_estimators=10))
    factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X, y, sample_weight=sample_weight, features=list(X.columns))
    values = factory.predict(X)

    for cl in factory.values():
        assert list(cl.features) == list(X.columns)

    for key, val in values.items():
        score = mean_squared_error(y, val)
        print(score)
        assert score < 0.2

    for key, iterator in factory.staged_predict(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), )

        # checking that last iteration coincides with previous
        assert numpy.all(p == values[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict(X)
    probs2 = clf_loaded.predict(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = RegressionReport({'rf': factory['rf']}, LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True, figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    report.feature_importance()
    report.features_correlation_matrix()
    report.predictions_scatter()

    val = numpy.mean(X['column0'])
    report_mask(report, "column0 > %f" % val, X)
    report_mask(report, lambda x: numpy.array(x['column0']) < val, X)
    report_mask(report, None, X)
def root_mean_squared_error(x, y):
    mse = mean_squared_error(x, y)
    rmse = np.sqrt(mse)
    return rmse
Example #6
0
def rootMeanSquaredError(gtruth, pred):
    squaredError = mean_squared_error(gtruth, pred)
    return np.sqrt(squaredError)
Example #7
0
 def getTestMSE( self ):
     y_predicted = self.reg.predict( self.X_test )
     return metrics.mean_squared_error( self.y_test, y_predicted )
Example #8
0
 def getTrainMSE( self ):
     y_predicted = self.reg.predict( self.X_train )
     return metrics.mean_squared_error( self.y_train, y_predicted )
Example #9
0
def root_mean_squared_error(x, y):
    mse = mean_squared_error(x, y)
    rmse = np.sqrt(mse)
    return rmse
Example #10
0
 def reportPerformance( self, X, y ):
     y_pred=self.reg.predict(X)
     print "Explained variance: {0:.5f}".format(metrics.explained_variance_score(y,y_pred)),"\n"
     print "Mean abs error: {0:.5f}".format(metrics.mean_absolute_error(y,y_pred)),"\n"
     print "Mean sqrt error: {0:.5f}".format(metrics.mean_squared_error(y,y_pred)),"\n"
     print "R2 score: {0:.5f}".format(metrics.r2_score(y,y_pred)),"\n"
Example #11
0
 def getTestMSE(self):
     y_predicted = self.reg.predict(self.X_test)
     return metrics.mean_squared_error(self.y_test, y_predicted)
Example #12
0
def mean_squared_mod(y_true, values, sample_weight=None):
    return mean_squared_error(y_true, values, sample_weight=sample_weight)
Example #13
0
def measure_performance(est, X, y ):
    y_pred=est.predict(X)
    print "Explained variance: {0:.5f}".format(metrics.explained_variance_score(y,y_pred)),"\n"
    print "Mean abs error: {0:.5f}".format(metrics.mean_absolute_error(y,y_pred)),"\n"
    print "Mean sqrt error: {0:.5f}".format(metrics.mean_squared_error(y,y_pred)),"\n"
    print "R2 score: {0:.5f}".format(metrics.r2_score(y,y_pred)),"\n"
Example #14
0
        return sum((tree.predict(X) for tree in self.trees))

    def fit(self, X, y):
        for m in range(self.n_boosting_steps):
            residuals = y - self.predict(X)
            new_tree = Node(X, residuals)
            new_tree.fit(max_tree_size=self.max_tree_size)
            self.trees.append(new_tree)

if __name__ == '__main__':

    from sklearn.cross_validation import train_test_split
    from sklearn.metrics.metrics import mean_squared_error
    from sklearn.datasets import load_boston

    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        test_size=0.33)

    from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
    sk_gbrt = GradientBoostingRegressor(n_estimators=20)
    sk_gbrt.fit(X_train, y_train)
    print "sklearn test MSE", mean_squared_error(y_test, sk_gbrt.predict(X_test))

    mart = MART(10, 15)
    mart.fit(X_train, y_train)
    print "mart test MSE", mean_squared_error(y_test, mart.predict(X_test))


Example #15
0
def mean_squared_mod(y_true, values, sample_weight=None):
    return mean_squared_error(y_true, values, sample_weight=sample_weight)
Example #16
0
 def getTrainMSE(self):
     y_predicted = self.reg.predict(self.X_train)
     return metrics.mean_squared_error(self.y_train, y_predicted)
Example #17
0
func = [exponential, sigmoid]
shift = 200
for i in range(100):
    mse__ = []
    pl.figure()
    pl.scatter(X[:,shift+i], y)
    for f in func:
        try:
            popt, pcov = curve_fit(f, X[:,shift + i], y)
        except RuntimeError:
            error_conn.append(shift + i)
            continue        
        y_ = f(x_, *popt)
        y_pred = f(X[:,shift + i], *popt)
        
        mse = mean_squared_error(y, y_pred)
        
        mse__.append(mse)
    
        lb = str(f)[str(f).find(' ')+1:str(f).rfind(' ')-3]

        pl.plot(x_, y_, label=lb)
    
    mse_.append(mse__)
    pl.legend()
    
#########################################################

label_list = []
indexes = np.array(zip(*np.triu_indices(samatha.shape[-1], 1)))
for i in range(X.shape[1]):
Example #18
0
def get_errors(forecast_actual_data, forecast_data):
        return round(mean_absolute_error(forecast_actual_data, forecast_data),2), round(mean_squared_error(forecast_actual_data, forecast_data),2)
Example #19
0
        y_train = y_[train_index]

        X_test = X_[test_index]

        #y_reg = svr_rbf.fit(X_train, y_train).predict(X_test)
        #y_reg = svr_lin.fit(X_train, y_train).predict(X_test)
        #y_reg = svr_poly.fit(X_train, y_train).predict(X_test)
        y_reg = lasso.fit(X_train, y_train).predict(X_test)

        #pl.scatter(y[test_index], y_rbf)
        #print np.count_nonzero(lasso.coef_)

        y_pred[test_index] += y_reg
        count_[test_index] += 1

    dist_mse.append(mean_squared_error(y_, y_pred / count_))
    dist_r2.append(r2_score(y_, y_pred / count_))

pl.plot(np.arange(0, 0.5, 0.01), np.array(dist_mse))

for i in range(10):
    pl.figure()
    pl.scatter(X_[:, i], y, c=groups, cmap=pl.cm.rainbow)

################################################
n_rows = 3
indexes = np.array(zip(*np.triu_indices(13, 1)))
color = 'bgr'
labels_group = ['elderly', 'mci', 'young']
j = 0
for _, x in enumerate(X.T):
Example #20
0
            prediction_.extend(prediction)

    verbose('----------\n')
    verbose("Evaluation")

    if opts.mode in ['age', 'gender']:
        from sklearn.metrics.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
        # Calculando desempeƱo
        print('Accuracy              :', accuracy_score(y_, prediction_))
        print('Precision             :', precision_score(y_, prediction_))
        print('Recall                :', recall_score(y_, prediction_))
        print('F-score               :', f1_score(y_, prediction_))
        print('\nClasification report:\n',
              classification_report(y_, prediction_))
        print('\nConfussion matrix   :\n', confusion_matrix(y_, prediction_))
    else:
        from sklearn.metrics.metrics import mean_absolute_error, mean_squared_error, r2_score
        print('Mean Abs Error        :', mean_absolute_error(y_, prediction_))
        print('Mean Sqr Error        :', mean_squared_error(y_, prediction_))
        print('R2 Error              :', r2_score(y_, prediction_))

    #plots:
    #import matplotlib.pyplot as plt
    #confusion_matrix_plot = confusion_matrix(y_test, prediction)
    #plt.title('matriz de confusion')
    #plt.colorbar()
    #plt.xlabel()
    #plt.xlabel('categoria de verdad')
    #plt.ylabel('categoria predecida')
    #plt.show()
Example #21
0
        y_train = y_[train_index]
        
        X_test = X_[test_index]
        
        #y_reg = svr_rbf.fit(X_train, y_train).predict(X_test)
        #y_reg = svr_lin.fit(X_train, y_train).predict(X_test)
        #y_reg = svr_poly.fit(X_train, y_train).predict(X_test)
        y_reg = lasso.fit(X_train, y_train).predict(X_test)
        
        #pl.scatter(y[test_index], y_rbf)
        #print np.count_nonzero(lasso.coef_)
        
        y_pred[test_index] += y_reg
        count_[test_index] += 1
    
    dist_mse.append(mean_squared_error(y_, y_pred/count_))
    dist_r2.append(r2_score(y_, y_pred/count_))

pl.plot(np.arange(0, 0.5, 0.01), np.array(dist_mse))

for i in range(10):
    pl.figure()
    pl.scatter(X_[:,i], y,c=groups, cmap=pl.cm.rainbow)

################################################
n_rows = 3   
indexes = np.array(zip(*np.triu_indices(13, 1)))
color = 'bgr'
labels_group = ['elderly', 'mci', 'young']
j = 0
for _, x in enumerate(X.T):
Example #22
0
def rootMeanSquaredError(gtruth, pred):
    squaredError = mean_squared_error(gtruth, pred)
    return np.sqrt(squaredError)
    verbose('----------\n')
    verbose("Evaluation")

    if opts.mode in ['age','gender']:
        from sklearn.metrics.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
        # Calculando desempeƱo
        print( 'Accuracy              :', accuracy_score(y_, prediction_))
        print( 'Precision             :', precision_score(y_, prediction_))
        print( 'Recall                :', recall_score(y_, prediction_))
        print( 'F-score               :', f1_score(y_, prediction_))
        print( '\nClasification report:\n', classification_report(y_,
                prediction_))
        print( '\nConfussion matrix   :\n',confusion_matrix(y_, prediction_))
    else:
        from sklearn.metrics.metrics import mean_absolute_error, mean_squared_error,r2_score
        print( 'Mean Abs Error        :', mean_absolute_error(y_, prediction_))
        print( 'Mean Sqr Error        :', mean_squared_error(y_, prediction_))
        print( 'R2 Error              :', r2_score(y_, prediction_))


    #plots:
    #import matplotlib.pyplot as plt
    #confusion_matrix_plot = confusion_matrix(y_test, prediction)
    #plt.title('matriz de confusion')
    #plt.colorbar()
    #plt.xlabel()
    #plt.xlabel('categoria de verdad')
    #plt.ylabel('categoria predecida')
    #plt.show()