def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2) assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be " "used when targets contain negative values.", mean_squared_log_error, [-1.], [-1.])
def test_regression_metrics(n_samples=50): y_true = np.arange(n_samples) y_pred = y_true + 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.) assert_almost_equal(mean_squared_log_error(y_true, y_pred), mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred))) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
def test_regression_multioutput_array(): y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]] y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]] mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') r = r2_score(y_true, y_pred, multioutput='raw_values') evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2) assert_array_almost_equal(mae, [0.25, 0.625], decimal=2) assert_array_almost_equal(r, [0.95, 0.93], decimal=2) assert_array_almost_equal(evs, [0.95, 0.93], decimal=2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. y_true = [[0, 0]]*4 y_pred = [[1, 1]]*4 mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') r = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(mse, [1., 1.], decimal=2) assert_array_almost_equal(mae, [1., 1.], decimal=2) assert_array_almost_equal(r, [0., 0.], decimal=2) r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(r, [0, -3.5], decimal=2) assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='uniform_average')) evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(evs, [0, -1.25], decimal=2) # Checking for the condition in which both numerator and denominator is # zero. y_true = [[1, 3], [-1, 2]] y_pred = [[1, 4], [-1, 1]] r2 = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(r2, [1., -3.], decimal=2) assert_equal(np.mean(r2), r2_score(y_true, y_pred, multioutput='uniform_average')) evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(evs, [1., -3.], decimal=2) assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred)) # Handling msle separately as it does not accept negative inputs. y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]]) msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values') msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred), multioutput='raw_values') assert_array_almost_equal(msle, msle2, decimal=2)
def test_multioutput_regression(): y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) error = mean_squared_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) error = mean_squared_log_error(y_true, y_pred) assert_almost_equal(error, 0.200, decimal=2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.) error = r2_score(y_true, y_pred, multioutput='variance_weighted') assert_almost_equal(error, 1. - 5. / 2) error = r2_score(y_true, y_pred, multioutput='uniform_average') assert_almost_equal(error, -.875)
def test_regression_custom_weights(): y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]] y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]] msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6]) maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6]) rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6]) evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6]) assert_almost_equal(msew, 0.39, decimal=2) assert_almost_equal(maew, 0.475, decimal=3) assert_almost_equal(rw, 0.94, decimal=2) assert_almost_equal(evsw, 0.94, decimal=2) # Handling msle separately as it does not accept negative inputs. y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]]) msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred), multioutput=[0.3, 0.7]) assert_almost_equal(msle, msle2, decimal=2)
plt.xlabel('Prediction') plt.ylabel('Real value') # Now add the perfect prediction line diagonal = np.linspace(0, np.max(y_test), 100) plt.plot(diagonal, diagonal, '-r') plt.show() # In[148]: from sklearn.metrics import mean_squared_log_error, mean_absolute_error print('MAE:\t$%.2f' % mean_absolute_error(y_test, y_pred)) print('MSLE:\t%.5f' % mean_squared_log_error(y_test, y_pred)) # In[149]: #Score/Accuracy print("Accuracy --> ", model.score(X_test, y_test)*100) # In[153]: #Train the model from sklearn import linear_model model = linear_model.LinearRegression()
def calcs_msle(y, p): return mean_squared_log_error(y, np.clip(p, 0, None))
def log_rmse(y_orig, y_pred): return math.sqrt(metrics.mean_squared_log_error(y_orig, y_pred))
print("Mean absolute error: ", round(np.mean(errors), 2)) print("Accuracy: ", round(accuracy, 2), "%", "\n") print("Explained variance regression score: ", explained_variance_score(y_rescaled, predict_valid)) print("R2 score: ", r2_score(y_rescaled, predict_valid), "\n") print("Maximum residual error: ", max_error(y_rescaled, predict_valid)) print("Median absolute error: ", median_absolute_error(y_rescaled, predict_valid)) print("Mean absolute error: ", mean_absolute_error(y_rescaled, predict_valid)) print("Mean squared error: ", mean_squared_error(y_rescaled, predict_valid)) print("Root mean squared error:", sqrt(mean_squared_error(y_rescaled, predict_valid))) print("Mean squared logarithmic error: ", mean_squared_log_error(y_rescaled, predict_valid), "\n") ############################################################################## # Leave-one-out cross validation cv = LeaveOneOut() scores = cross_validate(model, X, y, scoring=[ 'r2', 'neg_median_absolute_error', 'explained_variance', 'max_error', 'neg_mean_absolute_error', 'neg_mean_squared_error',
def rmsle(y_test, predictions): return np.sqrt(mean_squared_log_error(y_test, predictions))
x_scaled = scale(x) y_scaled = scale(y) from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split x_train , x_test , y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state = 42) reg_all = LinearRegression() reg_all.fit(x_train , y_train) y_pred = reg_all.predict(x_test) reg_all.score(x_test , y_test) from sklearn.tree import DecisionTreeRegressor x_train , x_test , y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state = 42) dt = DecisionTreeRegressor(max_depth=5, random_state=6) dt.fit(x_train , y_train) y_pred = dt.predict(x_test) dt.score(x_test , y_test) from sklearn import metrics print(metrics.mean_absolute_error(y_test, y_pred)) print(metrics.mean_squared_error(y_test, y_pred)) print(metrics.mean_squared_log_error(y_test, y_pred)) from sklearn.model_selection import cross_val_score cv_results = cross_val_score(reg_all , x, y, cv=5 ) print(cv_results)
print('Root Mean Squared Error = %0.3f' % rmse) #Mean Squared Error mse = mean_squared_error(y_true, y_pred) print('Mean Squared Error = %0.3f' % mse) #Mean Absolute Error mae = mean_absolute_error(y_true, y_pred) print('Mean Absolute Error = %0.3f' % mae) #Median Absolute Error med_ea = median_absolute_error(y_true, y_pred) print('Median Absolute Error = %0.3f' % med_ea) #Mean Squared Log Error msle = mean_squared_log_error(y_true, y_pred) print('Mean Squared Log Error = %0.3f' % msle) #Max Error me = max_error(y_true, y_pred) print('Max Error = %0.3f' % me) #Polt Actual vs. Predicted plt.title('Actual vs. Predicted') plt.xlabel('YearsExperience') plt.ylabel('Salary') plt.scatter(x_true, y_true) plt.scatter(x_true, y_pred) plt.show() #Outputs plot
from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_log_error import numpy as np # データの読み込み boston = load_boston() X = boston['data'] y = boston['target'] X_train, X_test, y_train, y_true = train_test_split(X, y, test_size=0.3, random_state=0) model = LinearRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) print('RMSLE is', np.sqrt(mean_squared_log_error(y_true, y_pred)))
def rmsle(actual, pred): pred[pred < 0] = 0 return mean_squared_log_error(actual, pred)**0.5
print("Mean Squared Log Error : ", msle[-1]) r2.append(r2_score(y, h)) print("R2 Score : ", r2[-1]) plt.plot(range(lr.cv_splits), ev, "bo") #plt.plot(range(lr.cv_splits), mae, "r+") #plt.plot(range(lr.cv_splits), rmse, "g--") #plt.plot(range(lr.cv_splits), msle, "b.") plt.plot(range(lr.cv_splits), r2, "g^") plt.title("Split vs Metrics") plt.show() """ print("Test Data") b = np.random.normal(scale=1 / X_train.shape[1]**.5) # can get the size by checking it in the gradient_descent_runner function W = np.random.normal(scale=1 / X_train.shape[1]**.5, size=X_train.shape[1]) b, W, cost_graph = sr.gradient_descent_runner(X_train, y_train, b, W) np.save("SRWeights.npy", np.append(W, b)) np.save("SRSteps.npy", np.array(steps)) h = sr.hypothesis(b, W, X_test) print("Explained Variance : ", explained_variance_score(y_test, h)) print("Mean Absolute Error : ", mean_absolute_error(y_test, h)) print("Root Mean Squared Error : ", mean_squared_error(y_test, h)**.5) print("Mean Squared Log Error : ", mean_squared_log_error(y_test, h)) print("R2 Score : ", r2_score(y_test, h))
df = DataFrame({'actual': actual, 'predictions': predictions}) df = df[df['actual'].notna()] df = df[df['predictions'].notna()] actual = numpy.asarray(df.actual) predictions = list(df.predictions) #save prediction vs expected to data frame and csv #TODO: change path to ./Results/lstm/.../pred_vs_exp.csv df.to_csv("./Results/lstm/" + sys.argv[1] + "/pred_vs_exp.csv", index=False) # evaluate performance rmse = sqrt(mean_squared_error(actual, predictions)) mse = mean_squared_error(actual, predictions) evs = explained_variance_score(actual, predictions) mae = mean_absolute_error(actual, predictions) msle = mean_squared_log_error(actual, predictions) meae = median_absolute_error(actual, predictions) r_square = r2_score(actual, predictions) print("rmse: ", rmse, " mse: ", mse, "evs: ", evs, "mae: ", mae, "msle: ", msle, "meae: ", meae, "r_square: ", r_square) #save accuracy metrics to data frame performance_evals = DataFrame(columns=['rmse', 'mse', 'evs', 'mae', \ 'msle', 'meae', 'r_square']) performance_evals = performance_evals.append({'rmse':rmse, \ 'mse':mse, \ 'evs':evs, \ 'mae':mae, \ 'msle':msle, \ 'meae':meae, \ 'r_square': r_square}, \
mse3 = mean_squared_error(y1test, y_poly_pred1) rmse3 = np.sqrt(mse3) #Quadratic Fit polynomial_features2 = PolynomialFeatures(degree=4) x_poly2 = polynomial_features2.fit_transform(x1) x_poly_test2 = polynomial_features2.fit_transform(x1test) model2 = LinearRegression() model2.fit(x_poly2, y1) y_poly_pred2 = np.around(model2.predict(x_poly_test2)) #Evaluate r2_d4 = r2_score(y1test, y_poly_pred2) mse4 = mean_squared_error(y1test, y_poly_pred2) rmse4 = np.sqrt(mse4) rmsle4 = mean_squared_log_error(y1test, y_poly_pred2) # 5th Degree polynomial_features3 = PolynomialFeatures(degree=5) x_poly3 = polynomial_features3.fit_transform(x1) x_poly_test3 = polynomial_features3.fit_transform(x1test) model3 = LinearRegression() model3.fit(x_poly3, y1) y_poly_pred3 = np.around(model3.predict(x_poly_test3)) #Evaluate r2_d5 = r2_score(y1test, y_poly_pred3) mse5 = mean_squared_error(y1test, y_poly_pred3) rmse5 = np.sqrt(mse5) rmsle5 = np.sqrt(mean_squared_log_error(y1test, y_poly_pred3))
b, W, cost_graph = lr.gradient_descent_runner(X, y, b, W) plt.plot(range(lr.num_iterations), np.log(cost_graph)) plt.title("Number of Iterations vs Cost") plt.show() X, y = X_train[start:end], y_train[start:end] h = lr.hypothesis(b, W, X) ev.append(explained_variance_score(y, h)) print("Explained Variance : ", ev[-1]) mae.append(mean_absolute_error(y, h)) print("Mean Absolute Error : ", mae[-1]) rmse.append(mean_squared_error(y, h)**.5) print("Root Mean Squared Error : ", rmse[-1]) msle.append(mean_squared_log_error(y, h)) print("Mean Squared Log Error : ", msle[-1]) r2.append(r2_score(y, h)) print("R2 Score : ", r2[-1]) global_mae.append(np.average(mae)) lambdas.append(lr.l2_lambda) if best_mae > global_mae[-1]: best_mae = global_mae[-1] best_l2 = lr.l2_lambda lr.l2_lambda *= 3 print("Test Data") lr.l2_lambda = best_l2 print("With best hyperparameter lambda ", lr.l2_lambda) b = np.random.normal(scale=1 / X_train.shape[1]**.5)
x_new['is_holiday']=df['is_holiday'] x_new['clouds_all']=df['clouds_all'] x_new['weather_type']=df['weather_type'] x_new['humidity']=df['humidity'] #x_new['date']=df['date'] #x_new['month']=df['month'] #x_train, x_test, y_train, y_test=train_test_split(x_new, Y, test_size=0.25, random_state=42) model=RandomForestRegressor(n_jobs=-1,n_estimators=100,max_depth=6) #original is 100 and 6 #model.fit(x_train,y_train) model.fit(x_new,Y) y_pred=model.predict(x_new) #y_pred=model.predict(x_test) print('msle is:%r',mean_squared_log_error(Y,y_pred)) #err=mean_squared_error(y_test, y_pred) #print('mean squared error is %r'%err) s='DataSets/Test.csv' df2=clean(s) dates=df2['date_time'] #X=df2.drop(['date_time','dew_point'], axis=1) x_new=pd.DataFrame() x_new['temperature']=df2['temperature'] x_new['hour']=df2['hour'] x_new['weather_description']=df2['weather_description'] x_new['is_holiday']=df2['is_holiday']
verbose_eval=500) xgbm_va_pred = np.expm1(xgbm.predict(xgb.DMatrix(X_valid))) xgbm_va_pred = np.clip(xgbm_va_pred, 0, 100000) xgbm_va_pred[xgbm_va_pred < 0] = 0 # ENS # lists for keep results lgb_xgb_rmsle = [] lgb_xgb_alphas = [] for alpha in np.linspace(0, 1, 101): y_pred = alpha * gbc_va_pred + (1 - alpha) * xgbm_va_pred #rmsle_score = np.sqrt(mean_squared_log_error(np.expm1(y_valid), y_pred)) rmsle_score = np.sqrt( mean_squared_log_error( np.expm1(y_valid), np.where(np.expm1(y_valid) > 5000, np.expm1(y_valid), y_pred))) lgb_xgb_rmsle.append(rmsle_score) lgb_xgb_alphas.append(alpha) lgb_xgb_rmsle = np.array(lgb_xgb_rmsle) lgb_xgb_alphas = np.array(lgb_xgb_alphas) lgb_xgb_best_alpha = lgb_xgb_alphas[np.argmin(lgb_xgb_rmsle)] print('best_rmsle=', lgb_xgb_rmsle.min()) print('best_alpha=', lgb_xgb_best_alpha) plt.plot(lgb_xgb_alphas, lgb_xgb_rmsle) plt.title('f1_score for ensemble') plt.xlabel('alpha') plt.ylabel('f1_score')
def evaluate(y_pred, y_true): """Returns the RMSLE(y_pred, y_true)""" return (mean_squared_log_error(y_true, y_pred))**0.5
def train(self, data): ts = datetime.datetime.now() self._features = data["features"] self._label = data["label"] # self._model.fit(self._features, self._label) result = dict() result["metricas"] = list() self._data = data["data"] ## posee el conjuto de datos completo self._features = data["features"] # X self._target = data["label"] # Y #self._model.fit(self._features, self._target) print("muestra el data. describe") describe = self._data.describe( ) ## TODO si muestra algo mandarlo a la pantalla AttributeError: 'dict' object has no attribute 'describe print(describe) item = dict() item["descripcion"] = str(describe) item["metrica"] = "Data.describe()" result["metricas"].append(item) print("muestra el data. corr()") corr = self._data.corr() ## TODO si anda mostrar el grafico item = dict() item["descripcion"] = str(corr) item["metrica"] = "Correlacion de los datos" result["metricas"].append(item) print(corr) ax = plt.figure(figsize=(12, 12)) sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True), square=True) plt.ylabel('Actual label') plt.xlabel('Predicted label') all_sample_title = 'Matriz de correlacion: {0}'.format(1) plt.title(all_sample_title, size=15) # figure.show() ax.savefig('MatrizCorrelacion.png') scores = cross_val_score( self._model, self._features, self._target, cv=5) ##cv indica en cuanto particiona el conjunto de datos print("muestra el score pero del cross validation") item = dict() item["descripcion"] = str(scores.mean()) item["metrica"] = "CrossValidation_mean" result["metricas"].append(item) item = dict() item["descripcion"] = str(scores) item["metrica"] = "CrossValidation_scores" result["metricas"].append(item) item = dict() item["descripcion"] = str(self._features) item["metrica"] = "Datos de X completos 100%" result["metricas"].append(item) item = dict() item["descripcion"] = str(self._target) item["metrica"] = "Datos de Y completos 100%" result["metricas"].append(item) ### REGION PRUEBA DE METRICAS### # Split the data into test and training (30% for test) X_train, X_test, Y_train, Y_test = train_test_split(self._features, self._target, test_size=0.3) # ENTRENAR USANDO el 70% self._model = self._model.fit(X_train, Y_train) # item = dict() # item["descripcion"] = str(self._model) # item["metrica"] = "self_model" # result["metricas"].append(item) accuracy = self._model.score(X_test, Y_test) item = dict() item["descripcion"] = str(accuracy) item["metrica"] = "exactitud(accuracy)" result["metricas"].append(item) print('Accuracy(exactitud): ' + str(accuracy)) print('Datos de X: ' + str(self._features)) print('Datos de Y: ' + str(self._target)) print('x e y de test aplicados al modelo') print(X_test) item = dict() item["descripcion"] = str(X_train) item["metrica"] = "70% X entrenamiento" result["metricas"].append(item) item = dict() item["descripcion"] = str(Y_train) item["metrica"] = "70% Y engtrenamiento" result["metricas"].append(item) item = dict() item["descripcion"] = str(X_test) item["metrica"] = "30% X test" result["metricas"].append(item) item = dict() print(Y_test) item["descripcion"] = str(Y_test) item["metrica"] = "30% Y test" result["metricas"].append(item) prediction = self._model.predict(X_test) acc = metrics.accuracy_score(Y_test, prediction) item = dict() print('accuracy_score' + str(acc)) item["descripcion"] = str(acc) item["metrica"] = "sklearn.metrics.accuracy_score" result["metricas"].append(item) confMatr = metrics.confusion_matrix(Y_test, prediction) item = dict() print(' metrics.confusion_matrix: ' + str(confMatr)) item["descripcion"] = str(confMatr) item["metrica"] = "sklearn. metrics.confusion_matrix" result["metricas"].append(item) figure = plt.figure(figsize=(100, 100)) sns.heatmap(confMatr, annot=True, fmt=".1f", linewidths=3, square=True, cmap='Blues_r') plt.ylabel('Actual label') plt.xlabel('Predicted label') all_sample_title = 'Accuracy Score: {0}'.format(acc) plt.title(all_sample_title, size=15) # figure.show() figure.savefig('ConfusionMatrix.png') item = dict() print('predeciendo el X_test' + str(prediction)) item["descripcion"] = str(prediction) item["metrica"] = "Y predecido con 70% datos" result["metricas"].append(item) # Measure - Since this is a regression problem, we will use the r2 score metric. scoreR2 = metrics.r2_score(Y_test, prediction) item = dict() item["descripcion"] = str(scoreR2) item["metrica"] = "score_r2_metric" result["metricas"].append(item) print(scoreR2) # PARA MEDIR TOMO Y_test PORQUE ES EL VALOR REAL PURO(del # 30% de los datos, los cuales no se usaron hasta ahora), # PARA PODER ESTABLECER LA RELACION CON LOS DATOS PREDECIDOS evs = metrics.explained_variance_score(Y_test, prediction) mae = metrics.mean_absolute_error(Y_test, prediction) mse = metrics.mean_squared_error(Y_test, prediction) mslg = metrics.mean_squared_log_error(Y_test, prediction) mene = metrics.median_absolute_error(Y_test, prediction) # RMSE - Root Mean Squared Error (RMSE) es la raiz cuadrada the mean of the squared errors: # MSE is more popular than MAE because MSE "punishes" larger errors. But, RMSE is even more popular than MSE because RMSE is interpretable in the "y" units. rmse = np.sqrt(metrics.mean_squared_error(Y_test, prediction)) print(rmse) item = dict() item["descripcion"] = str(rmse) item["metrica"] = "Root Mean Squared Error (RMSE)" result["metricas"].append(item) item = dict() item["descripcion"] = str(evs) item["metrica"] = "explained_variance_score" result["metricas"].append(item) item = dict() item["descripcion"] = str(mae) item["metrica"] = "mean_absolute_error" result["metricas"].append(item) item = dict() item["descripcion"] = str(mse) item["metrica"] = "mean_squared_error" result["metricas"].append(item) item = dict() item["descripcion"] = str(mslg) item["metrica"] = "mean_squared_log_error" result["metricas"].append(item) item = dict() item["descripcion"] = str(mene) item["metrica"] = "median_absolute_error" result["metricas"].append(item) tf = datetime.datetime.now() tf = tf - ts item = dict() item["descripcion"] = str(tf) item["metrica"] = "time" result["metricas"].append(item) print(evs) print(mae) print(mse) print(mslg) print(mene) # The coefficients # print('Coefficients: \n', self._model.coef_) # item = dict() # item["descripcion"] = str(self._model.coef_) # item["metrica"] = "coeficiente del modelo luego de la prediccion" # result["metricas"].append(item) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(Y_test, prediction)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(Y_test, prediction)) ### END REGION PRUEBA DE METRICAS### return result["metricas"]
svm = svm.SVR() reg = GridSearchCV(svm, parameters, scoring='r2', n_jobs=-1) reg.fit(X_train, y_train) best_score = reg.best_score_ best_param = reg.best_params_ best_model = reg.best_estimator_ #Can use in Cross_val_score & predict #Cross validated estimate on training and test data score = cross_val_score(estimator=svm, X=X_train, y=y_train, cv=kfold, scoring='r2') prediction = cross_val_predict(svm, X_test, y_test, cv=kfold, n_jobs=-1) #Variants of scoring msle = mean_squared_log_error(y_test, prediction) mse = mean_squared_error(y_test, prediction) mae = mean_absolute_error(y_test, prediction) r2 = r2_score(y_test, prediction) from math import sqrt rmse = sqrt(mse) #root mean -- #model Visualization plt.plot(y_test, color='red', label='Real time lapse') plt.plot(prediction, color='blue', label='Predicted time lapse') plt.title('Time lapse interval predictions') plt.xlabel('Frequency') plt.ylabel('Time lapse') plt.legend() plt.show()
print(model.score(X, Y)) Y_pred = model.predict(X_test) print("---------------model score--------------") print(model.score(X_test, Y_test)) #print(model.predict(test.iloc[514: 516, 0:256 ])) print("------------evaluation-------------") print("macrof1 score") print(f1_score(Y_test, Y_pred, average='macro')) print("microf1 score") print(f1_score(Y_test, Y_pred, average='micro')) print("weightedf1 score") print(f1_score(Y_test, Y_pred, average='weighted')) print("mean_squared_log_error") print(mean_squared_log_error(Y_test, Y_pred)) print("hamming loss") Y_pred1 = np.array(Y_pred) Y_test1 = np.array(Y_test) print(np.sum(np.not_equal(Y_test1, Y_pred1)) / float(Y_test1.size)) print("-----------------RandomForestClassifier---------------") modelrf = RandomForestClassifier(n_estimators=1000, criterion='gini', max_depth=None, min_samples_split=5, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True,
def NWRMSLE(y, pred, weights=None): err2 = skl_metrics.mean_squared_log_error(y, pred, sample_weight=weights) return math.sqrt(err2)
def rmsle(y_test, y_pred): return np.sqrt(mean_squared_log_error(y_test, y_pred))
plt.xlabel("Years of Employee") plt.ylabel("Saleries of Employee") plt.show() import statsmodels.api as sm #import statsmodels.formula.api as sm #import statsmodels.tools.tools.add_constant as sv X1 = sm.add_constant(X) reg = sm.OLS(y, X1).fit() reg.summary() from sklearn.metrics import mean_absolute_error mean_absolute_error(y_test, y_predict) from sklearn.metrics import mean_squared_error mean_squared_error(y_test, y_predict) from math import sqrt from sklearn.metrics import mean_squared_error result = sqrt(mean_squared_error(y_test, y_predict)) from sklearn.metrics import mean_squared_log_error np.sqrt(mean_squared_log_error(y_test, y_predict)) import statsmodels.api as sm #import statsmodels.formula.api as sm #import statsmodels.tools.tools.add_constant as sv X1 = sm.add_constant(X) reg = sm.OLS(y, X1).fit() reg.summary()
X_train, X_test, y_train, y_test = train_test_split(Data_new, y, test_size=0.2) from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,tes 'learning_rate': 0.01, 'loss': 'ls'} clf = GradientBoostingRegressor(**params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) mean_squared_log_error = mean_squared_log_error(y_test, clf.predict(X_test)) print("MSE: %.4f" % mse) feat_importances = pd.Series(clf.feature_importances_, index=Data_new.columns) feat_importances.nlargest(10).plot(kind='barh') test = pd.read_csv('test_QkPvNLx.csv') test.info() test['Competition_Metric'].hist() test['Competition_Metric'].mean() test['Competition_Metric'].median()
_valid_df = raw.train.loc[valid_idx] train_dataset = lgb.Dataset(_train_df[features], _train_df["likes_log"]) valid_dataset = lgb.Dataset(_valid_df[features], _valid_df["likes_log"]) model = lgb.train( Config.lgb_params, train_dataset, num_boost_round=1000, valid_sets=[train_dataset, valid_dataset], verbose_eval=50, early_stopping_rounds=200, categorical_feature=cat_features, ) y_pred = np.expm1(model.predict(_valid_df[features])) y_pred[y_pred < 0] = 0 y_true = _valid_df["likes"].values rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred)) rmsles.append(rmsle) print(f"------------------------ fold {fold} -----------------------") print(f"------------------- rmsle {rmsle} -----------------------") print() print("") print( f"------------------- average rmsle {np.mean(rmsles)} -----------------------" ) # %% train_dataset = lgb.Dataset(raw.train[features], raw.train["likes_log"]) model = lgb.train( Config.lgb_params, train_dataset,
def calc_score(y_true, y_pred): score = 100 * max(0, 1 - metrics.mean_squared_log_error(y_true, y_pred)) return score
def get_rmsle(y_true, y_pred): return np.sqrt(mean_squared_log_error(np.expm1(y_true), np.expm1(y_pred)))
print(history.history.keys()) plt.plot(history.history['mean_absolute_error']) plt.plot(history.history['val_mean_absolute_error']) plt.title('model accuracy') plt.ylabel('mean absolute error') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper left') plt.show() pred=pred.reshape(-1,) mae=mean_absolute_error(np.exp(pred),test_answer) msle=mean_squared_log_error(np.exp(pred),test_answer) mae_list.append(mae) RMSL_list.append(np.sqrt(msle)) print ("# MAE: {}\n".format( mae ) ) print ("# Mean square log error: {}\n".format( msle ) ) print ("# Root Mean square log error: {}\n".format( np.sqrt(msle )) ) print ("# mean of MAE: {}\n".format( np.mean(mae_list) ) ) print ("# std of MAE: {}\n".format( np.std(mae_list) ) ) print ("# mean of Root Mean square log error: {}\n".format( np.mean(RMSL_list )) ) print ("# std of Root Mean square log error: {}\n".format( np.std(RMSL_list) ) )
def __init__(self, parameters_dict): super(NetPosUsdDQRWU, self).__init__(parameters_dict) self.loss_eval = lambda x, y: mean_squared_log_error(x, y)
n_estimators=30, learning_rate=0.1, random_state=42) ada_svr_reg.fit(ri_PaintingLT_prepared_train, ri_PaintingLT_labels_train) ri_PaintingLT_predicted = ada_svr_reg.predict(ri_PaintingLT_prepared_test) from sklearn.metrics import mean_squared_error ada_svr_reg_mse = mean_squared_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted) ada_svr_reg_rmse = np.sqrt(ada_svr_reg_mse) print(ada_svr_reg_rmse) from sklearn.metrics import mean_absolute_error ada_svr_reg_mae = mean_absolute_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted) print(ada_svr_reg_mae) ada_svr_reg_mape = (np.abs( (ri_PaintingLT_predicted - ri_PaintingLT_labels_test) / ri_PaintingLT_labels_test).mean(axis=0)) print(ada_svr_reg_mape) from sklearn.metrics import mean_squared_log_error ri_PaintingLT_predicted[ri_PaintingLT_predicted < 0] = 0 ada_svr_reg_rmsle = np.sqrt( mean_squared_log_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted)) print(ada_svr_reg_rmsle)
def msle(labels, predicitions): from sklearn.metrics import mean_squared_log_error return mean_squared_log_error(labels, predicitions)
import pandas as pd from sklearn.metrics import mean_squared_log_error # Read the data data = pd.read_csv("../Train/train.csv") test_data = pd.read_csv("../test.csv") print(data.columns) train_predictions = data.cc_cons_apr + data.cc_cons_may + data.cc_cons_jun print(mean_squared_log_error(data.cc_cons, train_predictions)) test_predictions = test_data.cc_cons_apr + test_data.cc_cons_may + test_data.cc_cons_jun final_submission_data = pd.DataFrame({ 'id': test_data.id, 'cc_cons': test_predictions }) print(final_submission_data.head()) final_submission_data.to_csv("../Submissions/avg_submission.csv", index=False)