def linear_regression(x_train,y_train,x_test,y_test,axis,figname):
    lin_reg_model = linear_model.LinearRegression();
    lin_reg_model.fit(x_train, y_train);
    y_pred = lin_reg_model.predict(x_test);

    RMSE = np.sqrt( np.mean( (y_test - y_pred) ** 2.0 ) );

    # Format required by the evaluation function
    dict_test = dict();
    dict_pred = dict();
    for ID in range( len(y_pred) ):
        dict_test[ID] = int(y_test[ID]);
        dict_pred[ID] = y_pred[ID];

    # Actually evaluating the results
    plot.error_boxplot( dict_test, dict_pred, 5, axis, figname);
    
    return RMSE
# Tools
from utils import data
from evaluation import plot

print "> Loading"
root = data.getParent(__file__)
print "loading review scores"
target = data.loadFile(root + "/computed/reviews_score.pkl")
print "loading predicted scores"
predict = data.loadFile(root + "/computed/linear_regression_predict.pkl")

RMSE = 0
for review in target:
    RMSE += (target[review] - predict[review]) ** 2
RMSE /= len(target)

print "RMSE:", RMSE

plot.error_boxplot(target, predict)