Ejemplo n.º 1
0
        x for x in cols if x not in
        ['Date', 'USD', 'Value', 'Open', 'Close', 'High', 'Low', 'Volume']
    ]

    dict_dfs_cols[name] = new_cols

dataset = ml_dataset.generate_df_dataset(values_names, values_dfs,
                                         dict_dfs_cols)

#First 30 row
dataset = dataset[31:]
dataset = dataset.reset_index(drop=True)
datasetY = dataset.copy(deep=True)
#dataset = dataset.fillna(method='ffill')

training_dates = Iteration.Iteration('2009-08-19', '2014-12-01')
testing_dates = Iteration.Iteration('2014-12-02', '2016-04-20')
training_dates.calculate_indices(dataset)
testing_dates.calculate_indices(dataset)

trainDates = []
testDates = []
trainDates.append(training_dates.lowerIndex)
trainDates.append(training_dates.upperIndex)
testDates.append(testing_dates.lowerIndex)
testDates.append(testing_dates.upperIndex)

trainX, trainY, testX, testY, cols = ml_dataset.dataset_to_train_using_dates(
    dataset,
    trainDates,
    testDates,
Ejemplo n.º 2
0
#First 30 row
dataset = dataset[31:]
dataset = dataset.reset_index(drop=True)
#colsToShift = [col for col in dataset.columns if 'HSI' in col or'N225' in col or'AXJO' in col]
#dataset[colsToShift] = dataset[colsToShift].shift(-1)
#last_row = dataset.shape[0]-1
#dataset = dataset.drop(last_row, axis=0)

dataset_all = dataset_all[31:]
dataset_all = dataset_all.reset_index(drop=True)
#dataset_all[colsToShift] = dataset_all[colsToShift].shift(-1)
#last_row = dataset_all.shape[0]-1
#dataset_all = dataset_all.drop(last_row, axis=0)

training_dates = Iteration.Iteration('2009-08-19', '2014-12-01')
testing_dates = Iteration.Iteration('2014-12-02', '2016-04-20')
training_dates.calculate_indices(dataset)
testing_dates.calculate_indices(dataset)

trainDates = []
testDates = []
trainDates.append(training_dates.lowerIndex)
trainDates.append(training_dates.upperIndex)
testDates.append(testing_dates.lowerIndex)
testDates.append(testing_dates.upperIndex)

trainX, trainY, testX, testY, cols = ml_dataset.dataset_to_train_using_dates(
    dataset,
    trainDates,
    testDates,
Ejemplo n.º 3
0
            ensemble.RandomForestClassifier(),
            ensemble.ExtraTreesClassifier()
    ]:

        colsToShift = 1

        df_x = dataset.filter(regex=(regex))
        last_row = list(range(df_x.shape[0] - colsToShift, df_x.shape[0]))
        df_x = df_x.drop(last_row, axis=0)
        df_x = df_x.drop(colY, axis=1)

        df_y = dataset[colY].shift(-colsToShift)
        last_row = list(range(df_y.shape[0] - colsToShift, df_y.shape[0]))
        df_y = df_y.drop(last_row, axis=0)

        training_dates = Iteration.Iteration('2008-06-17', '2011-09-01')
        testing_dates = Iteration.Iteration('2012-09-04', '2014-10-06')
        training_dates.calculate_indices(dataset)
        testing_dates.calculate_indices(dataset)

        trainDates = []
        testDates = []
        trainDates.append(training_dates.lowerIndex)
        trainDates.append(training_dates.upperIndex)
        testDates.append(testing_dates.lowerIndex)
        testDates.append(testing_dates.upperIndex)

        total = (trainDates[1] - trainDates[0]) + (testDates[1] - testDates[0])
        tr = float(trainDates[1] - trainDates[0]) / total * 100.0
        te = float(testDates[1] - testDates[0]) / total * 100.0