def test_factory(): factory = RegressorsFactory() try: from rep.estimators.tmva import TMVARegressor factory.add_regressor('tmva', TMVARegressor()) except ImportError: pass factory.add_regressor('rf', RandomForestRegressor(n_estimators=10)) factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20)) X, y, sample_weight = generate_classification_data() assert factory == factory.fit(X, y, sample_weight=sample_weight, features=list(X.columns)) values = factory.predict(X) for cl in factory.values(): assert list(cl.features) == list(X.columns) for key, val in values.items(): score = mean_squared_error(y, val) print(score) assert score < 0.2 for key, iterator in factory.staged_predict(X).items(): assert key != 'tmva', 'tmva does not support staged pp' for p in iterator: assert p.shape == (len(X), ) # checking that last iteration coincides with previous assert numpy.all(p == values[key]) # testing picklability dump_string = cPickle.dumps(factory) clf_loaded = cPickle.loads(dump_string) assert type(factory) == type(clf_loaded) probs1 = factory.predict(X) probs2 = clf_loaded.predict(X) for key, val in probs1.items(): assert numpy.all(val == probs2[key]), 'something strange was loaded' report = RegressionReport({'rf': factory['rf']}, LabeledDataStorage(X, y, sample_weight)) report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True, figsize=(18, 3)) report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight)) report = factory.test_on(X, y, sample_weight=sample_weight) report.feature_importance() report.features_correlation_matrix() report.predictions_scatter() val = numpy.mean(X['column0']) report_mask(report, "column0 > %f" % val, X) report_mask(report, lambda x: numpy.array(x['column0']) < val, X) report_mask(report, None, X)
def reportPerformance(self, X, y): y_pred = self.reg.predict(X) print "Explained variance: {0:.5f}".format( metrics.explained_variance_score(y, y_pred)), "\n" print "Mean abs error: {0:.5f}".format( metrics.mean_absolute_error(y, y_pred)), "\n" print "Mean sqrt error: {0:.5f}".format( metrics.mean_squared_error(y, y_pred)), "\n" print "R2 score: {0:.5f}".format(metrics.r2_score(y, y_pred)), "\n"
def measure_performance(est, X, y): y_pred = est.predict(X) print "Explained variance: {0:.5f}".format( metrics.explained_variance_score(y, y_pred)), "\n" print "Mean abs error: {0:.5f}".format( metrics.mean_absolute_error(y, y_pred)), "\n" print "Mean sqrt error: {0:.5f}".format( metrics.mean_squared_error(y, y_pred)), "\n" print "R2 score: {0:.5f}".format(metrics.r2_score(y, y_pred)), "\n"
def root_mean_squared_error(x, y): mse = mean_squared_error(x, y) rmse = np.sqrt(mse) return rmse
def rootMeanSquaredError(gtruth, pred): squaredError = mean_squared_error(gtruth, pred) return np.sqrt(squaredError)
def getTestMSE( self ): y_predicted = self.reg.predict( self.X_test ) return metrics.mean_squared_error( self.y_test, y_predicted )
def getTrainMSE( self ): y_predicted = self.reg.predict( self.X_train ) return metrics.mean_squared_error( self.y_train, y_predicted )
def reportPerformance( self, X, y ): y_pred=self.reg.predict(X) print "Explained variance: {0:.5f}".format(metrics.explained_variance_score(y,y_pred)),"\n" print "Mean abs error: {0:.5f}".format(metrics.mean_absolute_error(y,y_pred)),"\n" print "Mean sqrt error: {0:.5f}".format(metrics.mean_squared_error(y,y_pred)),"\n" print "R2 score: {0:.5f}".format(metrics.r2_score(y,y_pred)),"\n"
def getTestMSE(self): y_predicted = self.reg.predict(self.X_test) return metrics.mean_squared_error(self.y_test, y_predicted)
def mean_squared_mod(y_true, values, sample_weight=None): return mean_squared_error(y_true, values, sample_weight=sample_weight)
def measure_performance(est, X, y ): y_pred=est.predict(X) print "Explained variance: {0:.5f}".format(metrics.explained_variance_score(y,y_pred)),"\n" print "Mean abs error: {0:.5f}".format(metrics.mean_absolute_error(y,y_pred)),"\n" print "Mean sqrt error: {0:.5f}".format(metrics.mean_squared_error(y,y_pred)),"\n" print "R2 score: {0:.5f}".format(metrics.r2_score(y,y_pred)),"\n"
return sum((tree.predict(X) for tree in self.trees)) def fit(self, X, y): for m in range(self.n_boosting_steps): residuals = y - self.predict(X) new_tree = Node(X, residuals) new_tree.fit(max_tree_size=self.max_tree_size) self.trees.append(new_tree) if __name__ == '__main__': from sklearn.cross_validation import train_test_split from sklearn.metrics.metrics import mean_squared_error from sklearn.datasets import load_boston boston = load_boston() X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.33) from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor sk_gbrt = GradientBoostingRegressor(n_estimators=20) sk_gbrt.fit(X_train, y_train) print "sklearn test MSE", mean_squared_error(y_test, sk_gbrt.predict(X_test)) mart = MART(10, 15) mart.fit(X_train, y_train) print "mart test MSE", mean_squared_error(y_test, mart.predict(X_test))
def getTrainMSE(self): y_predicted = self.reg.predict(self.X_train) return metrics.mean_squared_error(self.y_train, y_predicted)
func = [exponential, sigmoid] shift = 200 for i in range(100): mse__ = [] pl.figure() pl.scatter(X[:,shift+i], y) for f in func: try: popt, pcov = curve_fit(f, X[:,shift + i], y) except RuntimeError: error_conn.append(shift + i) continue y_ = f(x_, *popt) y_pred = f(X[:,shift + i], *popt) mse = mean_squared_error(y, y_pred) mse__.append(mse) lb = str(f)[str(f).find(' ')+1:str(f).rfind(' ')-3] pl.plot(x_, y_, label=lb) mse_.append(mse__) pl.legend() ######################################################### label_list = [] indexes = np.array(zip(*np.triu_indices(samatha.shape[-1], 1))) for i in range(X.shape[1]):
def get_errors(forecast_actual_data, forecast_data): return round(mean_absolute_error(forecast_actual_data, forecast_data),2), round(mean_squared_error(forecast_actual_data, forecast_data),2)
y_train = y_[train_index] X_test = X_[test_index] #y_reg = svr_rbf.fit(X_train, y_train).predict(X_test) #y_reg = svr_lin.fit(X_train, y_train).predict(X_test) #y_reg = svr_poly.fit(X_train, y_train).predict(X_test) y_reg = lasso.fit(X_train, y_train).predict(X_test) #pl.scatter(y[test_index], y_rbf) #print np.count_nonzero(lasso.coef_) y_pred[test_index] += y_reg count_[test_index] += 1 dist_mse.append(mean_squared_error(y_, y_pred / count_)) dist_r2.append(r2_score(y_, y_pred / count_)) pl.plot(np.arange(0, 0.5, 0.01), np.array(dist_mse)) for i in range(10): pl.figure() pl.scatter(X_[:, i], y, c=groups, cmap=pl.cm.rainbow) ################################################ n_rows = 3 indexes = np.array(zip(*np.triu_indices(13, 1))) color = 'bgr' labels_group = ['elderly', 'mci', 'young'] j = 0 for _, x in enumerate(X.T):
prediction_.extend(prediction) verbose('----------\n') verbose("Evaluation") if opts.mode in ['age', 'gender']: from sklearn.metrics.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score # Calculando desempeƱo print('Accuracy :', accuracy_score(y_, prediction_)) print('Precision :', precision_score(y_, prediction_)) print('Recall :', recall_score(y_, prediction_)) print('F-score :', f1_score(y_, prediction_)) print('\nClasification report:\n', classification_report(y_, prediction_)) print('\nConfussion matrix :\n', confusion_matrix(y_, prediction_)) else: from sklearn.metrics.metrics import mean_absolute_error, mean_squared_error, r2_score print('Mean Abs Error :', mean_absolute_error(y_, prediction_)) print('Mean Sqr Error :', mean_squared_error(y_, prediction_)) print('R2 Error :', r2_score(y_, prediction_)) #plots: #import matplotlib.pyplot as plt #confusion_matrix_plot = confusion_matrix(y_test, prediction) #plt.title('matriz de confusion') #plt.colorbar() #plt.xlabel() #plt.xlabel('categoria de verdad') #plt.ylabel('categoria predecida') #plt.show()
y_train = y_[train_index] X_test = X_[test_index] #y_reg = svr_rbf.fit(X_train, y_train).predict(X_test) #y_reg = svr_lin.fit(X_train, y_train).predict(X_test) #y_reg = svr_poly.fit(X_train, y_train).predict(X_test) y_reg = lasso.fit(X_train, y_train).predict(X_test) #pl.scatter(y[test_index], y_rbf) #print np.count_nonzero(lasso.coef_) y_pred[test_index] += y_reg count_[test_index] += 1 dist_mse.append(mean_squared_error(y_, y_pred/count_)) dist_r2.append(r2_score(y_, y_pred/count_)) pl.plot(np.arange(0, 0.5, 0.01), np.array(dist_mse)) for i in range(10): pl.figure() pl.scatter(X_[:,i], y,c=groups, cmap=pl.cm.rainbow) ################################################ n_rows = 3 indexes = np.array(zip(*np.triu_indices(13, 1))) color = 'bgr' labels_group = ['elderly', 'mci', 'young'] j = 0 for _, x in enumerate(X.T):
verbose('----------\n') verbose("Evaluation") if opts.mode in ['age','gender']: from sklearn.metrics.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score # Calculando desempeƱo print( 'Accuracy :', accuracy_score(y_, prediction_)) print( 'Precision :', precision_score(y_, prediction_)) print( 'Recall :', recall_score(y_, prediction_)) print( 'F-score :', f1_score(y_, prediction_)) print( '\nClasification report:\n', classification_report(y_, prediction_)) print( '\nConfussion matrix :\n',confusion_matrix(y_, prediction_)) else: from sklearn.metrics.metrics import mean_absolute_error, mean_squared_error,r2_score print( 'Mean Abs Error :', mean_absolute_error(y_, prediction_)) print( 'Mean Sqr Error :', mean_squared_error(y_, prediction_)) print( 'R2 Error :', r2_score(y_, prediction_)) #plots: #import matplotlib.pyplot as plt #confusion_matrix_plot = confusion_matrix(y_test, prediction) #plt.title('matriz de confusion') #plt.colorbar() #plt.xlabel() #plt.xlabel('categoria de verdad') #plt.ylabel('categoria predecida') #plt.show()