Beispiel #1
0
    def predict(self, point, last_price):
        """Point debe ser un Data Frame de Pandas con las información
		necesaria para realizar la predicción."""
        # 1. Standardize point with training mean and standard deviation.
        test_data = self.__standardize_features_for_test(
            point, self.columns_to_standardize, self.column_means,
            self.column_stds)
        # 2. Add it to the data.
        df = pd.concat([self.data, test_data])
        # 3. Windowize.
        fmt = DataFormatter()
        X, Y = fmt.windowize_series(df.as_matrix(),
                                    size=self.input_window_size,
                                    column_indexes=self.columns_to_windowize)
        # 4. Extract the last window.
        last_window = fmt.get_last_window(
            df.as_matrix(),
            size=self.input_window_size,
            column_indexes=self.columns_to_windowize)
        # 5. Compute the error.
        train_score = self.model.evaluate(X, Y, verbose=0)
        train_score = np.array([
            train_score[0],
            np.sqrt(train_score[0]), train_score[1], train_score[2] * 100
        ])
        # 6. Make the prediction.
        prediction = self.model.predict(last_window)
        # 7. Computing prediction intervals
        pred_upper = prediction + 1.96 * train_score[1]
        pred_lower = prediction - 1.96 * train_score[1]
        # 8. Transform back the prediction.
        prediction = last_price * np.exp(prediction)
        pred_upper = last_price * np.exp(pred_upper)
        pred_lower = last_price * np.exp(pred_lower)
        return prediction, [pred_lower, pred_upper]
Beispiel #2
0
 def fit_model(self, epochs=200, verbose=0):
     """Entrenar el modelo para producción."""
     # Patching
     self.column_means = {}
     self.column_stds = {}
     # Windowize dataset
     fmt = DataFormatter()
     self.X, self.Y = fmt.windowize_series(
         self.data.as_matrix(),
         size=self.input_window_size,
         column_indexes=self.columns_to_windowize)
     self.model.fit(self.X,
                    self.Y,
                    epochs=epochs,
                    batch_size=32,
                    verbose=verbose)
    def predict(self, point=None):
        """Point debe ser un Data Frame de Pandas con las información
		necesaria para realizar la predicción."""
        # 1. Standardize point with training mean and standard deviation.
        # 2. Add it to the data.
        if point is None:
            df = self.data
        else:
            test_data = self.__standardize_features_for_test(
                point, self.columns_to_standardize, self.column_means,
                self.column_stds)
            df = pd.concat([self.data, test_data])
        # 3. Windowize.
        fmt = DataFormatter()
        X, Y = fmt.windowize_series(df.as_matrix(),
                                    size=self.input_window_size,
                                    column_indexes=self.columns_to_windowize)
        # 4. Extract the last window.
        last_window = fmt.get_last_window(
            df.as_matrix(),
            size=self.input_window_size,
            column_indexes=self.columns_to_windowize)
        last_window = last_window[None, :]
        # 5. Compute the error.
        train_score = self.model.evaluate(X, Y, verbose=0)
        train_score = np.array([
            train_score[0],
            np.sqrt(train_score[0]), train_score[1], train_score[2] * 100
        ])
        # 6. Make the prediction.
        prediction = np.squeeze(self.model.predict(last_window))
        # 7. Computing prediction intervals
        pred_upper = prediction + 1.96 * train_score[1]
        pred_lower = prediction - 1.96 * train_score[1]
        # Revert standardization
        prediction = prediction * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        pred_upper = pred_upper * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        pred_lower = pred_lower * self.column_stds[
            u'Close'] + self.column_means[u'Close']
        return prediction, pred_lower, pred_upper
Beispiel #4
0
import os
import tempfile
import traceback
import weka.core.jvm as jvm
import edeweka.helper as helper
from weka.clusterers import Clusterer
import weka.core.converters as converters
import weka.core.serialization as serialization
from dataformatter import DataFormatter
import weka.core.packages as packages

dataDir = os.path.join(os.path.dirname(os.path.abspath('')), 'data')
modelDir = os.path.join(os.path.dirname(os.path.abspath('')), 'models')

dformat = DataFormatter(dataDir)

dformat.dict2arff(os.path.join(dataDir, 'System.csv'),
                  os.path.join(dataDir, 'System.arff'))

#Arff_file = os.path.join(dataDir, 'System.arff')

jvm.start(packages=True)

data = converters.load_any_file(os.path.join(dataDir, 'System.arff'))
clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                      options=["-N", "10", "-S", "10"])
clusterer.build_clusterer(data)

# print clusterer
# cluster the data
# for inst in data:
Beispiel #5
0
    def test_model(self, n_splits=9, cv_runs=10, epochs=100, verbose=2):
        """Evaluación del modelo usando validación cruzada
		hacia adelante."""
        from sklearn.model_selection import TimeSeriesSplit

        self.metrics = ['MSE', 'RMSE', 'MAE', 'MAPE']
        train_scores = np.zeros((cv_runs, n_splits, len(self.metrics)))
        test_scores = np.zeros((cv_runs, n_splits, len(self.metrics)))
        fmt = DataFormatter()
        tscv = TimeSeriesSplit(n_splits=n_splits)
        for j in xrange(cv_runs):
            # print('\nCross-validation run %i' % (j+1))
            i = 1
            for train_index, test_index in tscv.split(
                    self.data['LogReturn'].values):
                # División del conjunto de datos en entrenamiento y prueba
                train_df = self.data.loc[train_index]
                test_df = self.data.loc[test_index]
                # Estandarización del conjunto de datos
                if len(self.columns_to_standardize) != 0:
                    train_data, training_means, training_stds = self.__standardize_features(
                        train_df, self.columns_to_standardize)
                    test_data = self.__standardize_features_for_test(
                        test_df, self.columns_to_standardize, training_means,
                        training_stds)
                else:
                    train_data = train_df
                    test_data = test_df
                # Extracción de ventanas de datos
                trainX, trainY = fmt.windowize_series(
                    train_data.as_matrix(),
                    size=self.input_window_size,
                    column_indexes=self.columns_to_windowize)
                testX, testY = fmt.windowize_series(
                    test_data.as_matrix(),
                    size=self.input_window_size,
                    column_indexes=self.columns_to_windowize)
                # Ajustando el modelo
                # print('Fold %i' % (i))
                self.model.fit(trainX,
                               trainY,
                               epochs=epochs,
                               batch_size=32,
                               validation_data=(testX, testY),
                               verbose=verbose)
                # Evaluando cada partición de la validación cruzada hacia adelante
                train_score = self.model.evaluate(trainX,
                                                  trainY,
                                                  verbose=verbose)
                train_score = np.array([
                    train_score[0],
                    np.sqrt(train_score[0]), train_score[1],
                    train_score[2] * 100
                ])
                test_score = self.model.evaluate(testX, testY, verbose=verbose)
                test_score = np.array([
                    test_score[0],
                    np.sqrt(test_score[0]), test_score[1], test_score[2] * 100
                ])
                # print('Train Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE' % (train_score[0], train_score[1], train_score[2], train_score[3]))
                # print('Test Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE\n' % (test_score[0], test_score[1], test_score[2], test_score[3]))
                # [0: MSE, 1: RMSE, 2: MAE, 3: MAPE]
                train_scores[j, i - 1, :] = train_score
                test_scores[j, i - 1, :] = test_score
                i += 1
        self.train_results = train_scores.mean(axis=0).mean(axis=0)
        self.test_results = test_scores.mean(axis=0).mean(axis=0)