""" model = reg_cnn((X_train.shape[1], X_train.shape[2], X_train.shape[3])) model.fit(X_train, y_train, epochs=800, batch_size=64, verbose=1, validation_split=0.1) score = model.evaluate(X_test, y_test, batch_size=50) print score pred_y_test = model.predict(X_test) # means of val my_rmse = rmse(pred_y_test, y_test) print "rmse = ", my_rmse plt.figure(1) #plt.ylim(-1.5,3) plt.plot(dates[len(dates) - len(y_test):len(dates)], y_test, color='g') plt.plot(dates[len(dates) - len(pred_y_test):len(dates)], pred_y_test, color='r') plt.show() pred_y_train = model.predict(X_train) plt.figure(1) plt.plot(dates[0:len(y_train)], y_train, color='g') plt.plot(dates[0:len(pred_y_train)], pred_y_train, color='r') plt.show()
def main(): # ---------- Directories & User inputs -------------- # Location of data folder data_dir = './data/' FLAG_train = (len(sys.argv) > 1 and sys.argv[1] == '--train') ########################################## ######## Load and preprocess data ######## ########################################## # Read and preprocess data from CSV data = dataset.read_and_preprocess_data(data_dir=data_dir, file_name='training.csv') print data.head(), '\n', data.tail(), '\n', data.info() plt.figure() data.groupby(['serieNames'])['sales'].plot() plt.legend(loc="best") # Split data/labels into train/test set X_train, y_train, X_test, y_test = dataset.split_data(df=data, test_ratio=0.1) y_train_serieNames, y_test_serieNames = X_train['serieNames'], X_test[ 'serieNames'] # Data normalization sc = MinMaxScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) ########################################## ######## Train regressor ################# ########################################## if FLAG_train: models.train_regressor_models(X_train, y_train, n_splits=3) else: # Load the pre-trained regressor with tuned parameters # Linear Ridge Regression regressor_ridge = models.load_regressor_Ridge(X_train, y_train) # Random ForestRegression regressor_rf = models.load_regressor_RF(X_train, y_train) # Support Vector Regression regressor_svr = models.load_regressor_SVR(X_train, y_train) # Dummy regressor dummy = DummyRegressor(strategy='mean') dummy.fit(X_train, y_train) y_hat_dummy = pd.DataFrame({ 'y_hat_dummy': y_test, 'serieNames': y_test_serieNames }) y_hat_dummy = y_hat_dummy.groupby(['serieNames' ])['y_hat_dummy'].shift() y_hat_dummy = y_hat_dummy.fillna(method='bfill') print 'RMSE dummy mean %.5f' % (models.rmse(y_test, y_hat_dummy)) ########################################## ######## Compare model performance ####### ########################################## regressor_models = { 'Baseline Previous Mean': dummy, 'Ridge Regression': regressor_ridge, 'Support Vector Regression': regressor_svr, 'Random Forest Regression': regressor_rf } # Test errors: test the model with tuned parameters for i, regressor_model in sorted(regressor_models.items()): y_hat_regressor = regressor_model.predict(X_test) RMSE_regressor = models.rmse(y_test, y_hat_regressor) print 'RMSE %s : %.5f' % (i, RMSE_regressor) plt.figure() plt.ylabel("RMSE") plt.title('RMSE %s : %.5f' % (i, RMSE_regressor)) plot_prediction_perSerie(y_true=y_test, y_pred=y_hat_regressor, y_serieNames=y_test_serieNames) plt.figure() plt.ylabel("RMSE") plt.title('RMSE dummy last observation %.5f' % (models.rmse(y_test, y_hat_dummy))) plot_prediction_perSerie(y_true=y_test, y_pred=y_hat_dummy, y_serieNames=y_test_serieNames) # Generization errors: cross_validate_score plt.figure() plt.title('Generalization errors (RMSE)') n_splits = 10 scoring = 'neg_mean_squared_error' for i, regressor_model in sorted(regressor_models.items()): test_error = models.get_regressor_cross_validate_score( regressor_model, X_test, y_test, scoring=scoring, n_splits=n_splits) test_rmse = np.array([np.sqrt(-e) for e in test_error]) plt.plot(test_rmse, 'o-', label=i + ' : %0.2f (+/- %0.2f)' % (test_rmse.mean(), test_rmse.std() / 2)) plt.xlabel("Fold number") plt.ylabel("RMSE") plt.legend(loc="best") ########################################## ######## Make predictions ################ ########################################## file_name = 'test.csv' new_samples = dataset.read_and_preprocess_data(data_dir=data_dir, file_name=file_name) X_new = new_samples.values X_new = sc.transform(X_new) # Directly predict y_new_hat = regressor_rf.predict(X_new) # Fit all data available and make prediction X_all = np.concatenate((X_train, X_test), axis=0) y_all = np.concatenate((y_train, y_test), axis=0) regressor_rf.fit(X_all, y_all) y_new_hat_all = regressor_rf.predict(X_new) # Plot the prediction results plt.figure() df_new = pd.DataFrame({ 'sales_pred_90': y_new_hat, 'sales_pred_100': y_new_hat_all, 'serieNames': new_samples['serieNames'] }) df_new.groupby(['serieNames' ])['sales_pred_90'].plot(label='sales_pred_90%') df_new.groupby(['serieNames' ])['sales_pred_100'].plot(style='o--', label='sales_pred_100%') plt.ylabel("sales") plt.legend(loc="best") ########################################## ######## Save prediction results ######### ########################################## # Save the prediction results df_new.reset_index() df_new.to_csv('./results/prediction.csv', index=False) # Write to the test.csv format df_test = pd.read_csv(data_dir + 'test.csv') df_test['sales'] = y_new_hat_all df_test.to_csv('./results/test_prediction.csv', index=False) plt.figure() df_test = df_test.set_index(['TSDate']) df_test.groupby(['serieNames'])['sales'].plot(style='*-') plt.ylabel("sales") plt.legend(loc="best") # Visualize the prediction Visualize_prediction(data_dir) plt.legend(loc="best") plt.show()
print(X_test[-2]) print(Y_test[-2]) print(predicted_Y_test[-2]) tmp = X_test[-2] tmp = tmp.reshape(1, 1, 10) print(model.predict(tmp)) print(X_test[-1]) print(Y_test[-1]) print(predicted_Y_test[-1]) tmp = X_test[-1] tmp = tmp.reshape(1, 1, 10) print(model.predict(tmp)) # means of val my_rmse = rmse(predicted_Y_test, Y_test) print "rmse = ", my_rmse plt.figure(1) #plt.ylim(-1.5,3) plt.plot(dates[len(dates) - len(Y_test):len(dates)], Y_test, color='g') plt.plot(dates[len(dates) - len(predicted_Y_test):len(dates)], predicted_Y_test, color='r') plt.show() predicted_Y_train = model.predict(X_train) plt.figure(1) plt.plot(dates[0:len(Y_train)], Y_train, color='g') plt.plot(dates[0:len(predicted_Y_train)], predicted_Y_train, color='r') plt.show()
def ensemble_rmse(weights): final_prediction = 0 for weight, prediction in zip(weights, predictions): final_prediction += weight * prediction return rmse(target[split:], final_prediction)