"""

model = reg_cnn((X_train.shape[1], X_train.shape[2], X_train.shape[3]))
model.fit(X_train,
          y_train,
          epochs=800,
          batch_size=64,
          verbose=1,
          validation_split=0.1)

score = model.evaluate(X_test, y_test, batch_size=50)
print score
pred_y_test = model.predict(X_test)

# means of val
my_rmse = rmse(pred_y_test, y_test)
print "rmse = ", my_rmse

plt.figure(1)
#plt.ylim(-1.5,3)
plt.plot(dates[len(dates) - len(y_test):len(dates)], y_test, color='g')
plt.plot(dates[len(dates) - len(pred_y_test):len(dates)],
         pred_y_test,
         color='r')
plt.show()

pred_y_train = model.predict(X_train)
plt.figure(1)
plt.plot(dates[0:len(y_train)], y_train, color='g')
plt.plot(dates[0:len(pred_y_train)], pred_y_train, color='r')
plt.show()
Exemple #2
0
def main():
    # ---------- Directories & User inputs --------------
    # Location of data folder
    data_dir = './data/'
    FLAG_train = (len(sys.argv) > 1 and sys.argv[1] == '--train')

    ##########################################
    ######## Load and preprocess data ########
    ##########################################

    # Read and preprocess data from CSV
    data = dataset.read_and_preprocess_data(data_dir=data_dir,
                                            file_name='training.csv')
    print data.head(), '\n', data.tail(), '\n', data.info()
    plt.figure()
    data.groupby(['serieNames'])['sales'].plot()
    plt.legend(loc="best")

    # Split data/labels into train/test set
    X_train, y_train, X_test, y_test = dataset.split_data(df=data,
                                                          test_ratio=0.1)
    y_train_serieNames, y_test_serieNames = X_train['serieNames'], X_test[
        'serieNames']

    # Data normalization
    sc = MinMaxScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    ##########################################
    ######## Train regressor #################
    ##########################################
    if FLAG_train:
        models.train_regressor_models(X_train, y_train, n_splits=3)
    else:
        # Load the pre-trained regressor with tuned parameters
        # Linear Ridge Regression
        regressor_ridge = models.load_regressor_Ridge(X_train, y_train)
        # Random ForestRegression
        regressor_rf = models.load_regressor_RF(X_train, y_train)
        # Support Vector Regression
        regressor_svr = models.load_regressor_SVR(X_train, y_train)

        # Dummy regressor
        dummy = DummyRegressor(strategy='mean')
        dummy.fit(X_train, y_train)
        y_hat_dummy = pd.DataFrame({
            'y_hat_dummy': y_test,
            'serieNames': y_test_serieNames
        })
        y_hat_dummy = y_hat_dummy.groupby(['serieNames'
                                           ])['y_hat_dummy'].shift()
        y_hat_dummy = y_hat_dummy.fillna(method='bfill')
        print 'RMSE dummy mean %.5f' % (models.rmse(y_test, y_hat_dummy))

        ##########################################
        ######## Compare model performance #######
        ##########################################

        regressor_models = {
            'Baseline Previous Mean': dummy,
            'Ridge Regression': regressor_ridge,
            'Support Vector Regression': regressor_svr,
            'Random Forest Regression': regressor_rf
        }

        # Test errors: test the model with tuned parameters
        for i, regressor_model in sorted(regressor_models.items()):
            y_hat_regressor = regressor_model.predict(X_test)
            RMSE_regressor = models.rmse(y_test, y_hat_regressor)
            print 'RMSE %s : %.5f' % (i, RMSE_regressor)

            plt.figure()
            plt.ylabel("RMSE")
            plt.title('RMSE %s : %.5f' % (i, RMSE_regressor))
            plot_prediction_perSerie(y_true=y_test,
                                     y_pred=y_hat_regressor,
                                     y_serieNames=y_test_serieNames)

        plt.figure()
        plt.ylabel("RMSE")
        plt.title('RMSE dummy last observation %.5f' %
                  (models.rmse(y_test, y_hat_dummy)))
        plot_prediction_perSerie(y_true=y_test,
                                 y_pred=y_hat_dummy,
                                 y_serieNames=y_test_serieNames)

        # Generization errors: cross_validate_score
        plt.figure()
        plt.title('Generalization errors (RMSE)')
        n_splits = 10
        scoring = 'neg_mean_squared_error'
        for i, regressor_model in sorted(regressor_models.items()):
            test_error = models.get_regressor_cross_validate_score(
                regressor_model,
                X_test,
                y_test,
                scoring=scoring,
                n_splits=n_splits)
            test_rmse = np.array([np.sqrt(-e) for e in test_error])
            plt.plot(test_rmse,
                     'o-',
                     label=i + ' : %0.2f (+/- %0.2f)' %
                     (test_rmse.mean(), test_rmse.std() / 2))

        plt.xlabel("Fold number")
        plt.ylabel("RMSE")
        plt.legend(loc="best")

        ##########################################
        ######## Make predictions ################
        ##########################################
        file_name = 'test.csv'
        new_samples = dataset.read_and_preprocess_data(data_dir=data_dir,
                                                       file_name=file_name)

        X_new = new_samples.values
        X_new = sc.transform(X_new)

        # Directly predict
        y_new_hat = regressor_rf.predict(X_new)

        # Fit all data available and make prediction
        X_all = np.concatenate((X_train, X_test), axis=0)
        y_all = np.concatenate((y_train, y_test), axis=0)
        regressor_rf.fit(X_all, y_all)
        y_new_hat_all = regressor_rf.predict(X_new)

        # Plot the prediction results
        plt.figure()
        df_new = pd.DataFrame({
            'sales_pred_90': y_new_hat,
            'sales_pred_100': y_new_hat_all,
            'serieNames': new_samples['serieNames']
        })
        df_new.groupby(['serieNames'
                        ])['sales_pred_90'].plot(label='sales_pred_90%')
        df_new.groupby(['serieNames'
                        ])['sales_pred_100'].plot(style='o--',
                                                  label='sales_pred_100%')
        plt.ylabel("sales")
        plt.legend(loc="best")

        ##########################################
        ######## Save prediction results #########
        ##########################################

        # Save the prediction results
        df_new.reset_index()
        df_new.to_csv('./results/prediction.csv', index=False)

        # Write to the test.csv format
        df_test = pd.read_csv(data_dir + 'test.csv')
        df_test['sales'] = y_new_hat_all
        df_test.to_csv('./results/test_prediction.csv', index=False)

        plt.figure()
        df_test = df_test.set_index(['TSDate'])
        df_test.groupby(['serieNames'])['sales'].plot(style='*-')
        plt.ylabel("sales")
        plt.legend(loc="best")

        # Visualize the prediction
        Visualize_prediction(data_dir)

        plt.legend(loc="best")
        plt.show()
print(X_test[-2])
print(Y_test[-2])
print(predicted_Y_test[-2])
tmp = X_test[-2]
tmp = tmp.reshape(1, 1, 10)
print(model.predict(tmp))

print(X_test[-1])
print(Y_test[-1])
print(predicted_Y_test[-1])
tmp = X_test[-1]
tmp = tmp.reshape(1, 1, 10)
print(model.predict(tmp))

# means of val
my_rmse = rmse(predicted_Y_test, Y_test)
print "rmse = ", my_rmse

plt.figure(1)
#plt.ylim(-1.5,3)
plt.plot(dates[len(dates) - len(Y_test):len(dates)], Y_test, color='g')
plt.plot(dates[len(dates) - len(predicted_Y_test):len(dates)],
         predicted_Y_test,
         color='r')
plt.show()

predicted_Y_train = model.predict(X_train)
plt.figure(1)
plt.plot(dates[0:len(Y_train)], Y_train, color='g')
plt.plot(dates[0:len(predicted_Y_train)], predicted_Y_train, color='r')
plt.show()
Exemple #4
0
    def ensemble_rmse(weights):
        final_prediction = 0
        for weight, prediction in zip(weights, predictions):
            final_prediction += weight * prediction

        return rmse(target[split:], final_prediction)