def predict(self, point, last_price): """Point debe ser un Data Frame de Pandas con las información necesaria para realizar la predicción.""" # 1. Standardize point with training mean and standard deviation. test_data = self.__standardize_features_for_test( point, self.columns_to_standardize, self.column_means, self.column_stds) # 2. Add it to the data. df = pd.concat([self.data, test_data]) # 3. Windowize. fmt = DataFormatter() X, Y = fmt.windowize_series(df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 4. Extract the last window. last_window = fmt.get_last_window( df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 5. Compute the error. train_score = self.model.evaluate(X, Y, verbose=0) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) # 6. Make the prediction. prediction = self.model.predict(last_window) # 7. Computing prediction intervals pred_upper = prediction + 1.96 * train_score[1] pred_lower = prediction - 1.96 * train_score[1] # 8. Transform back the prediction. prediction = last_price * np.exp(prediction) pred_upper = last_price * np.exp(pred_upper) pred_lower = last_price * np.exp(pred_lower) return prediction, [pred_lower, pred_upper]
def fit_model(self, epochs=200, verbose=0): """Entrenar el modelo para producción.""" # Patching self.column_means = {} self.column_stds = {} # Windowize dataset fmt = DataFormatter() self.X, self.Y = fmt.windowize_series( self.data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) self.model.fit(self.X, self.Y, epochs=epochs, batch_size=32, verbose=verbose)
def predict(self, point=None): """Point debe ser un Data Frame de Pandas con las información necesaria para realizar la predicción.""" # 1. Standardize point with training mean and standard deviation. # 2. Add it to the data. if point is None: df = self.data else: test_data = self.__standardize_features_for_test( point, self.columns_to_standardize, self.column_means, self.column_stds) df = pd.concat([self.data, test_data]) # 3. Windowize. fmt = DataFormatter() X, Y = fmt.windowize_series(df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 4. Extract the last window. last_window = fmt.get_last_window( df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) last_window = last_window[None, :] # 5. Compute the error. train_score = self.model.evaluate(X, Y, verbose=0) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) # 6. Make the prediction. prediction = np.squeeze(self.model.predict(last_window)) # 7. Computing prediction intervals pred_upper = prediction + 1.96 * train_score[1] pred_lower = prediction - 1.96 * train_score[1] # Revert standardization prediction = prediction * self.column_stds[ u'Close'] + self.column_means[u'Close'] pred_upper = pred_upper * self.column_stds[ u'Close'] + self.column_means[u'Close'] pred_lower = pred_lower * self.column_stds[ u'Close'] + self.column_means[u'Close'] return prediction, pred_lower, pred_upper
def test_model(self, n_splits=9, cv_runs=10, epochs=100, verbose=2): """Evaluación del modelo usando validación cruzada hacia adelante.""" from sklearn.model_selection import TimeSeriesSplit self.metrics = ['MSE', 'RMSE', 'MAE', 'MAPE'] train_scores = np.zeros((cv_runs, n_splits, len(self.metrics))) test_scores = np.zeros((cv_runs, n_splits, len(self.metrics))) fmt = DataFormatter() tscv = TimeSeriesSplit(n_splits=n_splits) for j in xrange(cv_runs): # print('\nCross-validation run %i' % (j+1)) i = 1 for train_index, test_index in tscv.split( self.data['LogReturn'].values): # División del conjunto de datos en entrenamiento y prueba train_df = self.data.loc[train_index] test_df = self.data.loc[test_index] # Estandarización del conjunto de datos if len(self.columns_to_standardize) != 0: train_data, training_means, training_stds = self.__standardize_features( train_df, self.columns_to_standardize) test_data = self.__standardize_features_for_test( test_df, self.columns_to_standardize, training_means, training_stds) else: train_data = train_df test_data = test_df # Extracción de ventanas de datos trainX, trainY = fmt.windowize_series( train_data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) testX, testY = fmt.windowize_series( test_data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # Ajustando el modelo # print('Fold %i' % (i)) self.model.fit(trainX, trainY, epochs=epochs, batch_size=32, validation_data=(testX, testY), verbose=verbose) # Evaluando cada partición de la validación cruzada hacia adelante train_score = self.model.evaluate(trainX, trainY, verbose=verbose) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) test_score = self.model.evaluate(testX, testY, verbose=verbose) test_score = np.array([ test_score[0], np.sqrt(test_score[0]), test_score[1], test_score[2] * 100 ]) # print('Train Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE' % (train_score[0], train_score[1], train_score[2], train_score[3])) # print('Test Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE\n' % (test_score[0], test_score[1], test_score[2], test_score[3])) # [0: MSE, 1: RMSE, 2: MAE, 3: MAPE] train_scores[j, i - 1, :] = train_score test_scores[j, i - 1, :] = test_score i += 1 self.train_results = train_scores.mean(axis=0).mean(axis=0) self.test_results = test_scores.mean(axis=0).mean(axis=0)