def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
Example #3
0
def test_regression_multioutput_array():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')

    assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2)
    assert_array_almost_equal(mae, [0.25, 0.625], decimal=2)
    assert_array_almost_equal(r, [0.95, 0.93], decimal=2)
    assert_array_almost_equal(evs, [0.95, 0.93], decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    y_true = [[0, 0]]*4
    y_pred = [[1, 1]]*4
    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(mse, [1., 1.], decimal=2)
    assert_array_almost_equal(mae, [1., 1.], decimal=2)
    assert_array_almost_equal(r, [0., 0.], decimal=2)

    r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
    assert_array_almost_equal(r, [0, -3.5], decimal=2)
    assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                 multioutput='uniform_average'))
    evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                   multioutput='raw_values')
    assert_array_almost_equal(evs, [0, -1.25], decimal=2)

    # Checking for the condition in which both numerator and denominator is
    # zero.
    y_true = [[1, 3], [-1, 2]]
    y_pred = [[1, 4], [-1, 1]]
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(r2, [1., -3.], decimal=2)
    assert_equal(np.mean(r2), r2_score(y_true, y_pred,
                 multioutput='uniform_average'))
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(evs, [1., -3.], decimal=2)
    assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred))

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput='raw_values')
    assert_array_almost_equal(msle, msle2, decimal=2)
Example #4
0
def test_multioutput_regression():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])

    error = mean_squared_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = mean_squared_log_error(y_true, y_pred)
    assert_almost_equal(error, 0.200, decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    error = mean_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = r2_score(y_true, y_pred, multioutput='variance_weighted')
    assert_almost_equal(error, 1. - 5. / 2)
    error = r2_score(y_true, y_pred, multioutput='uniform_average')
    assert_almost_equal(error, -.875)
Example #5
0
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2)
plt.xlabel('Prediction')
plt.ylabel('Real value')

# Now add the perfect prediction line
diagonal = np.linspace(0, np.max(y_test), 100)
plt.plot(diagonal, diagonal, '-r')
plt.show()


# In[148]:


from sklearn.metrics import mean_squared_log_error, mean_absolute_error

print('MAE:\t$%.2f' % mean_absolute_error(y_test, y_pred))
print('MSLE:\t%.5f' % mean_squared_log_error(y_test, y_pred))


# In[149]:


#Score/Accuracy
print("Accuracy --> ", model.score(X_test, y_test)*100)


# In[153]:


#Train the model
from sklearn import linear_model
model = linear_model.LinearRegression()
Example #7
0
def calcs_msle(y, p):
    return mean_squared_log_error(y, np.clip(p, 0, None))
Example #8
0
def log_rmse(y_orig, y_pred):
    return math.sqrt(metrics.mean_squared_log_error(y_orig, y_pred))
print("Mean absolute error: ", round(np.mean(errors), 2))
print("Accuracy: ", round(accuracy, 2), "%", "\n")

print("Explained variance regression score: ",
      explained_variance_score(y_rescaled, predict_valid))
print("R2 score: ", r2_score(y_rescaled, predict_valid), "\n")

print("Maximum residual error: ", max_error(y_rescaled, predict_valid))
print("Median absolute error: ",
      median_absolute_error(y_rescaled, predict_valid))
print("Mean absolute error: ", mean_absolute_error(y_rescaled, predict_valid))
print("Mean squared error: ", mean_squared_error(y_rescaled, predict_valid))
print("Root mean squared error:",
      sqrt(mean_squared_error(y_rescaled, predict_valid)))
print("Mean squared logarithmic error: ",
      mean_squared_log_error(y_rescaled, predict_valid), "\n")

##############################################################################

# Leave-one-out cross validation

cv = LeaveOneOut()

scores = cross_validate(model,
                        X,
                        y,
                        scoring=[
                            'r2', 'neg_median_absolute_error',
                            'explained_variance', 'max_error',
                            'neg_mean_absolute_error',
                            'neg_mean_squared_error',
def rmsle(y_test, predictions):
    return np.sqrt(mean_squared_log_error(y_test, predictions))
Example #11
0
x_scaled = scale(x)
y_scaled = scale(y)

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

x_train , x_test , y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state = 42)

reg_all = LinearRegression()
reg_all.fit(x_train , y_train)
y_pred = reg_all.predict(x_test)
reg_all.score(x_test , y_test)

from sklearn.tree import DecisionTreeRegressor
x_train , x_test , y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state = 42)
dt = DecisionTreeRegressor(max_depth=5,  random_state=6)
dt.fit(x_train , y_train)
y_pred = dt.predict(x_test)
dt.score(x_test , y_test)

from sklearn import metrics
print(metrics.mean_absolute_error(y_test, y_pred))
print(metrics.mean_squared_error(y_test, y_pred))

print(metrics.mean_squared_log_error(y_test, y_pred))

from sklearn.model_selection import cross_val_score
cv_results = cross_val_score(reg_all , x, y,  cv=5 )
print(cv_results)

print('Root Mean Squared Error = %0.3f' % rmse)

#Mean Squared Error
mse = mean_squared_error(y_true, y_pred)
print('Mean Squared Error = %0.3f' % mse)

#Mean Absolute Error
mae = mean_absolute_error(y_true, y_pred)
print('Mean Absolute Error = %0.3f' % mae)

#Median Absolute Error
med_ea = median_absolute_error(y_true, y_pred)
print('Median Absolute Error = %0.3f' % med_ea)

#Mean Squared Log Error
msle = mean_squared_log_error(y_true, y_pred)
print('Mean Squared Log Error = %0.3f' % msle)

#Max Error
me = max_error(y_true, y_pred)
print('Max Error = %0.3f' % me)

#Polt Actual vs. Predicted
plt.title('Actual vs. Predicted')
plt.xlabel('YearsExperience')
plt.ylabel('Salary')
plt.scatter(x_true, y_true)
plt.scatter(x_true, y_pred)
plt.show()

#Outputs plot
Example #13
0
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_log_error
import numpy as np

# データの読み込み
boston = load_boston()
X = boston['data']
y = boston['target']
X_train, X_test, y_train, y_true = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('RMSLE is', np.sqrt(mean_squared_log_error(y_true, y_pred)))
def rmsle(actual, pred):
    pred[pred < 0] = 0
    return mean_squared_log_error(actual, pred)**0.5
        print("Mean Squared Log Error : ", msle[-1])
        r2.append(r2_score(y, h))
        print("R2 Score : ", r2[-1])

    plt.plot(range(lr.cv_splits), ev, "bo")
    #plt.plot(range(lr.cv_splits), mae, "r+")
    #plt.plot(range(lr.cv_splits), rmse, "g--")
    #plt.plot(range(lr.cv_splits), msle, "b.")
    plt.plot(range(lr.cv_splits), r2, "g^")
    plt.title("Split vs Metrics")
    plt.show()
    """

    print("Test Data")
    b = np.random.normal(scale=1 / X_train.shape[1]**.5)
    # can get the size by checking it in the gradient_descent_runner function
    W = np.random.normal(scale=1 / X_train.shape[1]**.5, size=X_train.shape[1])

    b, W, cost_graph = sr.gradient_descent_runner(X_train, y_train, b, W)

    np.save("SRWeights.npy", np.append(W, b))
    np.save("SRSteps.npy", np.array(steps))

    h = sr.hypothesis(b, W, X_test)

    print("Explained Variance : ", explained_variance_score(y_test, h))
    print("Mean Absolute Error : ", mean_absolute_error(y_test, h))
    print("Root Mean Squared Error : ", mean_squared_error(y_test, h)**.5)
    print("Mean Squared Log Error : ", mean_squared_log_error(y_test, h))
    print("R2 Score : ", r2_score(y_test, h))
Example #16
0
df = DataFrame({'actual': actual, 'predictions': predictions})
df = df[df['actual'].notna()]
df = df[df['predictions'].notna()]
actual = numpy.asarray(df.actual)
predictions = list(df.predictions)

#save prediction vs expected to data frame and csv
#TODO: change path to ./Results/lstm/.../pred_vs_exp.csv
df.to_csv("./Results/lstm/" + sys.argv[1] + "/pred_vs_exp.csv", index=False)

# evaluate performance
rmse = sqrt(mean_squared_error(actual, predictions))
mse = mean_squared_error(actual, predictions)
evs = explained_variance_score(actual, predictions)
mae = mean_absolute_error(actual, predictions)
msle = mean_squared_log_error(actual, predictions)
meae = median_absolute_error(actual, predictions)
r_square = r2_score(actual, predictions)
print("rmse: ", rmse, " mse: ", mse, "evs: ", evs, "mae: ", mae, "msle: ",
      msle, "meae: ", meae, "r_square: ", r_square)

#save accuracy metrics to data frame
performance_evals = DataFrame(columns=['rmse', 'mse', 'evs', 'mae', \
                                        'msle', 'meae', 'r_square'])
performance_evals = performance_evals.append({'rmse':rmse, \
                                                'mse':mse, \
                                                'evs':evs, \
                                                'mae':mae, \
                                                'msle':msle, \
                                                'meae':meae, \
                                                'r_square': r_square}, \
Example #17
0
mse3 = mean_squared_error(y1test, y_poly_pred1)
rmse3 = np.sqrt(mse3)

#Quadratic Fit
polynomial_features2 = PolynomialFeatures(degree=4)
x_poly2 = polynomial_features2.fit_transform(x1)
x_poly_test2 = polynomial_features2.fit_transform(x1test)
model2 = LinearRegression()
model2.fit(x_poly2, y1)
y_poly_pred2 = np.around(model2.predict(x_poly_test2))

#Evaluate
r2_d4 = r2_score(y1test, y_poly_pred2)
mse4 = mean_squared_error(y1test, y_poly_pred2)
rmse4 = np.sqrt(mse4)
rmsle4 = mean_squared_log_error(y1test, y_poly_pred2)

# 5th Degree
polynomial_features3 = PolynomialFeatures(degree=5)
x_poly3 = polynomial_features3.fit_transform(x1)
x_poly_test3 = polynomial_features3.fit_transform(x1test)
model3 = LinearRegression()
model3.fit(x_poly3, y1)
y_poly_pred3 = np.around(model3.predict(x_poly_test3))

#Evaluate
r2_d5 = r2_score(y1test, y_poly_pred3)
mse5 = mean_squared_error(y1test, y_poly_pred3)
rmse5 = np.sqrt(mse5)
rmsle5 = np.sqrt(mean_squared_log_error(y1test, y_poly_pred3))
Example #18
0
            b, W, cost_graph = lr.gradient_descent_runner(X, y, b, W)

            plt.plot(range(lr.num_iterations), np.log(cost_graph))
            plt.title("Number of Iterations vs Cost")
            plt.show()

            X, y = X_train[start:end], y_train[start:end]
            h = lr.hypothesis(b, W, X)

            ev.append(explained_variance_score(y, h))
            print("Explained Variance : ", ev[-1])
            mae.append(mean_absolute_error(y, h))
            print("Mean Absolute Error : ", mae[-1])
            rmse.append(mean_squared_error(y, h)**.5)
            print("Root Mean Squared Error : ", rmse[-1])
            msle.append(mean_squared_log_error(y, h))
            print("Mean Squared Log Error : ", msle[-1])
            r2.append(r2_score(y, h))
            print("R2 Score : ", r2[-1])

        global_mae.append(np.average(mae))
        lambdas.append(lr.l2_lambda)
        if best_mae > global_mae[-1]:
            best_mae = global_mae[-1]
            best_l2 = lr.l2_lambda
        lr.l2_lambda *= 3

    print("Test Data")
    lr.l2_lambda = best_l2
    print("With best hyperparameter lambda ", lr.l2_lambda)
    b = np.random.normal(scale=1 / X_train.shape[1]**.5)
Example #19
0
x_new['is_holiday']=df['is_holiday']
x_new['clouds_all']=df['clouds_all']
x_new['weather_type']=df['weather_type']
x_new['humidity']=df['humidity']
#x_new['date']=df['date']
#x_new['month']=df['month']

#x_train, x_test, y_train, y_test=train_test_split(x_new, Y, test_size=0.25, random_state=42)

model=RandomForestRegressor(n_jobs=-1,n_estimators=100,max_depth=6) #original is 100 and 6

#model.fit(x_train,y_train)
model.fit(x_new,Y)
y_pred=model.predict(x_new)
#y_pred=model.predict(x_test)
print('msle is:%r',mean_squared_log_error(Y,y_pred))
#err=mean_squared_error(y_test, y_pred)

#print('mean squared error is %r'%err)


s='DataSets/Test.csv'
df2=clean(s)
dates=df2['date_time']
#X=df2.drop(['date_time','dew_point'], axis=1)

x_new=pd.DataFrame()
x_new['temperature']=df2['temperature']
x_new['hour']=df2['hour']
x_new['weather_description']=df2['weather_description']
x_new['is_holiday']=df2['is_holiday']
Example #20
0
                     verbose_eval=500)
    xgbm_va_pred = np.expm1(xgbm.predict(xgb.DMatrix(X_valid)))
    xgbm_va_pred = np.clip(xgbm_va_pred, 0, 100000)
    xgbm_va_pred[xgbm_va_pred < 0] = 0

    # ENS
    # lists for keep results
    lgb_xgb_rmsle = []
    lgb_xgb_alphas = []

    for alpha in np.linspace(0, 1, 101):
        y_pred = alpha * gbc_va_pred + (1 - alpha) * xgbm_va_pred
        #rmsle_score = np.sqrt(mean_squared_log_error(np.expm1(y_valid), y_pred))
        rmsle_score = np.sqrt(
            mean_squared_log_error(
                np.expm1(y_valid),
                np.where(np.expm1(y_valid) > 5000, np.expm1(y_valid), y_pred)))
        lgb_xgb_rmsle.append(rmsle_score)
        lgb_xgb_alphas.append(alpha)

    lgb_xgb_rmsle = np.array(lgb_xgb_rmsle)
    lgb_xgb_alphas = np.array(lgb_xgb_alphas)

    lgb_xgb_best_alpha = lgb_xgb_alphas[np.argmin(lgb_xgb_rmsle)]

    print('best_rmsle=', lgb_xgb_rmsle.min())
    print('best_alpha=', lgb_xgb_best_alpha)
    plt.plot(lgb_xgb_alphas, lgb_xgb_rmsle)
    plt.title('f1_score for ensemble')
    plt.xlabel('alpha')
    plt.ylabel('f1_score')
def evaluate(y_pred, y_true):
    """Returns the RMSLE(y_pred, y_true)"""
    return (mean_squared_log_error(y_true, y_pred))**0.5
Example #22
0
    def train(self, data):
        ts = datetime.datetime.now()
        self._features = data["features"]
        self._label = data["label"]
        # self._model.fit(self._features, self._label)

        result = dict()
        result["metricas"] = list()

        self._data = data["data"]
        ## posee el conjuto de datos completo
        self._features = data["features"]  # X
        self._target = data["label"]  # Y
        #self._model.fit(self._features, self._target)

        print("muestra el data. describe")
        describe = self._data.describe(
        )  ## TODO si muestra algo mandarlo a la pantalla AttributeError: 'dict' object has no attribute 'describe
        print(describe)
        item = dict()
        item["descripcion"] = str(describe)
        item["metrica"] = "Data.describe()"
        result["metricas"].append(item)

        print("muestra el data. corr()")
        corr = self._data.corr()  ## TODO si anda mostrar el grafico
        item = dict()
        item["descripcion"] = str(corr)
        item["metrica"] = "Correlacion de los datos"
        result["metricas"].append(item)
        print(corr)

        ax = plt.figure(figsize=(12, 12))
        sns.heatmap(corr,
                    mask=np.zeros_like(corr, dtype=np.bool),
                    cmap=sns.diverging_palette(220, 10, as_cmap=True),
                    square=True)
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        all_sample_title = 'Matriz de correlacion: {0}'.format(1)
        plt.title(all_sample_title, size=15)
        # figure.show()
        ax.savefig('MatrizCorrelacion.png')

        scores = cross_val_score(
            self._model, self._features, self._target,
            cv=5)  ##cv indica en cuanto particiona el conjunto de datos
        print("muestra el score pero del cross validation")
        item = dict()
        item["descripcion"] = str(scores.mean())
        item["metrica"] = "CrossValidation_mean"
        result["metricas"].append(item)
        item = dict()
        item["descripcion"] = str(scores)
        item["metrica"] = "CrossValidation_scores"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(self._features)
        item["metrica"] = "Datos de X completos 100%"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(self._target)
        item["metrica"] = "Datos de Y completos 100%"
        result["metricas"].append(item)

        ### REGION PRUEBA DE METRICAS###

        # Split the data into test and training (30% for test)
        X_train, X_test, Y_train, Y_test = train_test_split(self._features,
                                                            self._target,
                                                            test_size=0.3)

        # ENTRENAR USANDO el 70%
        self._model = self._model.fit(X_train, Y_train)
        # item = dict()
        # item["descripcion"] = str(self._model)
        # item["metrica"] = "self_model"
        # result["metricas"].append(item)

        accuracy = self._model.score(X_test, Y_test)
        item = dict()
        item["descripcion"] = str(accuracy)
        item["metrica"] = "exactitud(accuracy)"
        result["metricas"].append(item)

        print('Accuracy(exactitud): ' + str(accuracy))

        print('Datos de X: ' + str(self._features))
        print('Datos de Y: ' + str(self._target))

        print('x e y de test aplicados al modelo')

        print(X_test)

        item = dict()
        item["descripcion"] = str(X_train)
        item["metrica"] = "70% X entrenamiento"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(Y_train)
        item["metrica"] = "70% Y engtrenamiento"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(X_test)
        item["metrica"] = "30% X test"
        result["metricas"].append(item)

        item = dict()
        print(Y_test)
        item["descripcion"] = str(Y_test)
        item["metrica"] = "30% Y test"
        result["metricas"].append(item)

        prediction = self._model.predict(X_test)

        acc = metrics.accuracy_score(Y_test, prediction)

        item = dict()
        print('accuracy_score' + str(acc))
        item["descripcion"] = str(acc)
        item["metrica"] = "sklearn.metrics.accuracy_score"
        result["metricas"].append(item)

        confMatr = metrics.confusion_matrix(Y_test, prediction)
        item = dict()
        print(' metrics.confusion_matrix: ' + str(confMatr))
        item["descripcion"] = str(confMatr)
        item["metrica"] = "sklearn. metrics.confusion_matrix"
        result["metricas"].append(item)

        figure = plt.figure(figsize=(100, 100))
        sns.heatmap(confMatr,
                    annot=True,
                    fmt=".1f",
                    linewidths=3,
                    square=True,
                    cmap='Blues_r')
        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        all_sample_title = 'Accuracy Score: {0}'.format(acc)
        plt.title(all_sample_title, size=15)
        # figure.show()
        figure.savefig('ConfusionMatrix.png')

        item = dict()
        print('predeciendo el X_test' + str(prediction))
        item["descripcion"] = str(prediction)
        item["metrica"] = "Y predecido con 70% datos"
        result["metricas"].append(item)

        # Measure - Since this is a regression problem, we will use the r2 score metric.
        scoreR2 = metrics.r2_score(Y_test, prediction)
        item = dict()
        item["descripcion"] = str(scoreR2)
        item["metrica"] = "score_r2_metric"
        result["metricas"].append(item)
        print(scoreR2)

        # PARA MEDIR TOMO Y_test PORQUE ES EL VALOR REAL PURO(del
        # 30% de los datos, los cuales no se usaron hasta ahora),
        # PARA PODER ESTABLECER LA RELACION CON LOS DATOS PREDECIDOS
        evs = metrics.explained_variance_score(Y_test, prediction)
        mae = metrics.mean_absolute_error(Y_test, prediction)
        mse = metrics.mean_squared_error(Y_test, prediction)
        mslg = metrics.mean_squared_log_error(Y_test, prediction)
        mene = metrics.median_absolute_error(Y_test, prediction)
        # RMSE - Root Mean Squared Error (RMSE) es la raiz cuadrada the mean of the squared errors:
        # MSE is more popular than MAE because MSE "punishes" larger errors. But, RMSE is even more popular than MSE because RMSE is interpretable in the "y" units.
        rmse = np.sqrt(metrics.mean_squared_error(Y_test, prediction))
        print(rmse)

        item = dict()
        item["descripcion"] = str(rmse)
        item["metrica"] = "Root Mean Squared Error (RMSE)"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(evs)
        item["metrica"] = "explained_variance_score"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(mae)
        item["metrica"] = "mean_absolute_error"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(mse)
        item["metrica"] = "mean_squared_error"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(mslg)
        item["metrica"] = "mean_squared_log_error"
        result["metricas"].append(item)

        item = dict()
        item["descripcion"] = str(mene)
        item["metrica"] = "median_absolute_error"
        result["metricas"].append(item)

        tf = datetime.datetime.now()
        tf = tf - ts
        item = dict()
        item["descripcion"] = str(tf)
        item["metrica"] = "time"
        result["metricas"].append(item)

        print(evs)
        print(mae)
        print(mse)
        print(mslg)
        print(mene)

        # The coefficients
        # print('Coefficients: \n', self._model.coef_)
        # item = dict()
        # item["descripcion"] = str(self._model.coef_)
        # item["metrica"] = "coeficiente del modelo luego de la prediccion"
        # result["metricas"].append(item)

        # The mean squared error
        print("Mean squared error: %.2f" %
              mean_squared_error(Y_test, prediction))
        # Explained variance score: 1 is perfect prediction
        print('Variance score: %.2f' % r2_score(Y_test, prediction))
        ### END REGION PRUEBA DE METRICAS###

        return result["metricas"]
svm = svm.SVR()
reg = GridSearchCV(svm, parameters, scoring='r2', n_jobs=-1)
reg.fit(X_train, y_train)
best_score = reg.best_score_
best_param = reg.best_params_
best_model = reg.best_estimator_  #Can use in Cross_val_score & predict

#Cross validated estimate on training and test data
score = cross_val_score(estimator=svm,
                        X=X_train,
                        y=y_train,
                        cv=kfold,
                        scoring='r2')
prediction = cross_val_predict(svm, X_test, y_test, cv=kfold, n_jobs=-1)

#Variants of scoring
msle = mean_squared_log_error(y_test, prediction)
mse = mean_squared_error(y_test, prediction)
mae = mean_absolute_error(y_test, prediction)
r2 = r2_score(y_test, prediction)
from math import sqrt
rmse = sqrt(mse)  #root mean --

#model Visualization
plt.plot(y_test, color='red', label='Real time lapse')
plt.plot(prediction, color='blue', label='Predicted time lapse')
plt.title('Time lapse interval predictions')
plt.xlabel('Frequency')
plt.ylabel('Time lapse')
plt.legend()
plt.show()
Example #24
0
print(model.score(X, Y))
Y_pred = model.predict(X_test)

print("---------------model score--------------")
print(model.score(X_test, Y_test))
#print(model.predict(test.iloc[514: 516, 0:256 ]))

print("------------evaluation-------------")
print("macrof1 score")
print(f1_score(Y_test, Y_pred, average='macro'))
print("microf1 score")
print(f1_score(Y_test, Y_pred, average='micro'))
print("weightedf1 score")
print(f1_score(Y_test, Y_pred, average='weighted'))
print("mean_squared_log_error")
print(mean_squared_log_error(Y_test, Y_pred))
print("hamming loss")
Y_pred1 = np.array(Y_pred)
Y_test1 = np.array(Y_test)
print(np.sum(np.not_equal(Y_test1, Y_pred1)) / float(Y_test1.size))

print("-----------------RandomForestClassifier---------------")
modelrf = RandomForestClassifier(n_estimators=1000,
                                 criterion='gini',
                                 max_depth=None,
                                 min_samples_split=5,
                                 min_samples_leaf=1,
                                 min_weight_fraction_leaf=0.0,
                                 max_features='auto',
                                 max_leaf_nodes=None,
                                 bootstrap=True,
Example #25
0
def NWRMSLE(y, pred, weights=None):
    err2 = skl_metrics.mean_squared_log_error(y, pred, sample_weight=weights)
    return math.sqrt(err2)
def rmsle(y_test, y_pred):
    return np.sqrt(mean_squared_log_error(y_test, y_pred))
Example #27
0
plt.xlabel("Years of Employee")
plt.ylabel("Saleries of Employee")
plt.show()

import statsmodels.api as sm
#import statsmodels.formula.api as sm
#import statsmodels.tools.tools.add_constant as sv
X1 = sm.add_constant(X)
reg = sm.OLS(y, X1).fit()
reg.summary()

from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_predict)

from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_predict)

from math import sqrt
from sklearn.metrics import mean_squared_error
result = sqrt(mean_squared_error(y_test, y_predict))

from sklearn.metrics import mean_squared_log_error
np.sqrt(mean_squared_log_error(y_test, y_predict))

import statsmodels.api as sm
#import statsmodels.formula.api as sm
#import statsmodels.tools.tools.add_constant as sv
X1 = sm.add_constant(X)
reg = sm.OLS(y, X1).fit()
reg.summary()
X_train, X_test, y_train, y_test = train_test_split(Data_new, y, test_size=0.2)


from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,tes
          'learning_rate': 0.01, 'loss': 'ls'}
clf = GradientBoostingRegressor(**params)


clf.fit(X_train, y_train)
mse = mean_squared_error(y_test, clf.predict(X_test))
mean_squared_log_error = mean_squared_log_error(y_test, clf.predict(X_test))
print("MSE: %.4f" % mse)

feat_importances = pd.Series(clf.feature_importances_, index=Data_new.columns)
feat_importances.nlargest(10).plot(kind='barh')




test = pd.read_csv('test_QkPvNLx.csv')
test.info()
test['Competition_Metric'].hist()
test['Competition_Metric'].mean()
test['Competition_Metric'].median()

Example #29
0
    _valid_df = raw.train.loc[valid_idx]
    train_dataset = lgb.Dataset(_train_df[features], _train_df["likes_log"])
    valid_dataset = lgb.Dataset(_valid_df[features], _valid_df["likes_log"])
    model = lgb.train(
        Config.lgb_params,
        train_dataset,
        num_boost_round=1000,
        valid_sets=[train_dataset, valid_dataset],
        verbose_eval=50,
        early_stopping_rounds=200,
        categorical_feature=cat_features,
    )
    y_pred = np.expm1(model.predict(_valid_df[features]))
    y_pred[y_pred < 0] = 0
    y_true = _valid_df["likes"].values
    rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred))
    rmsles.append(rmsle)
    print(f"------------------------ fold {fold} -----------------------")
    print(f"------------------- rmsle {rmsle} -----------------------")
    print()

print("")
print(
    f"------------------- average rmsle {np.mean(rmsles)} -----------------------"
)

# %%
train_dataset = lgb.Dataset(raw.train[features], raw.train["likes_log"])
model = lgb.train(
    Config.lgb_params,
    train_dataset,
def calc_score(y_true, y_pred):
    score = 100 * max(0, 1 - metrics.mean_squared_log_error(y_true, y_pred))

    return score
Example #31
0
def get_rmsle(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(np.expm1(y_true), np.expm1(y_pred)))
Example #32
0
    
  print(history.history.keys())
  plt.plot(history.history['mean_absolute_error'])
  plt.plot(history.history['val_mean_absolute_error'])
  plt.title('model accuracy')
  plt.ylabel('mean absolute error')
  plt.xlabel('epoch')
  plt.legend(['train', 'val'], loc='upper left')
  plt.show()
  

 
  pred=pred.reshape(-1,)

  mae=mean_absolute_error(np.exp(pred),test_answer)
  msle=mean_squared_log_error(np.exp(pred),test_answer)
  
  
  mae_list.append(mae)
  RMSL_list.append(np.sqrt(msle))
  
  
  print ("# MAE: {}\n".format( mae )	)
  print ("# Mean square log error: {}\n".format( msle )	)
  print ("# Root Mean square log error: {}\n".format( np.sqrt(msle ))	)

print ("# mean of MAE: {}\n".format( np.mean(mae_list) )	)
print ("# std of MAE: {}\n".format( np.std(mae_list) )	)
print ("# mean of Root Mean square log error: {}\n".format( np.mean(RMSL_list ))	)
print ("# std of Root Mean square log error: {}\n".format( np.std(RMSL_list) )	)
Example #33
0
    def __init__(self, parameters_dict):

        super(NetPosUsdDQRWU, self).__init__(parameters_dict)
        self.loss_eval = lambda x, y: mean_squared_log_error(x, y)
                                n_estimators=30,
                                learning_rate=0.1,
                                random_state=42)
ada_svr_reg.fit(ri_PaintingLT_prepared_train, ri_PaintingLT_labels_train)
ri_PaintingLT_predicted = ada_svr_reg.predict(ri_PaintingLT_prepared_test)

from sklearn.metrics import mean_squared_error

ada_svr_reg_mse = mean_squared_error(ri_PaintingLT_labels_test,
                                     ri_PaintingLT_predicted)
ada_svr_reg_rmse = np.sqrt(ada_svr_reg_mse)
print(ada_svr_reg_rmse)

from sklearn.metrics import mean_absolute_error

ada_svr_reg_mae = mean_absolute_error(ri_PaintingLT_labels_test,
                                      ri_PaintingLT_predicted)
print(ada_svr_reg_mae)

ada_svr_reg_mape = (np.abs(
    (ri_PaintingLT_predicted - ri_PaintingLT_labels_test) /
    ri_PaintingLT_labels_test).mean(axis=0))
print(ada_svr_reg_mape)

from sklearn.metrics import mean_squared_log_error

ri_PaintingLT_predicted[ri_PaintingLT_predicted < 0] = 0
ada_svr_reg_rmsle = np.sqrt(
    mean_squared_log_error(ri_PaintingLT_labels_test, ri_PaintingLT_predicted))
print(ada_svr_reg_rmsle)
Example #35
0
def msle(labels, predicitions):
    from sklearn.metrics import mean_squared_log_error
    return mean_squared_log_error(labels, predicitions)
Example #36
0
import pandas as pd
from sklearn.metrics import mean_squared_log_error

# Read the data
data = pd.read_csv("../Train/train.csv")
test_data = pd.read_csv("../test.csv")

print(data.columns)

train_predictions = data.cc_cons_apr + data.cc_cons_may + data.cc_cons_jun
print(mean_squared_log_error(data.cc_cons, train_predictions))

test_predictions = test_data.cc_cons_apr + test_data.cc_cons_may + test_data.cc_cons_jun
final_submission_data = pd.DataFrame({
    'id': test_data.id,
    'cc_cons': test_predictions
})
print(final_submission_data.head())
final_submission_data.to_csv("../Submissions/avg_submission.csv", index=False)