x for x in cols if x not in ['Date', 'USD', 'Value', 'Open', 'Close', 'High', 'Low', 'Volume'] ] dict_dfs_cols[name] = new_cols dataset = ml_dataset.generate_df_dataset(values_names, values_dfs, dict_dfs_cols) #First 30 row dataset = dataset[31:] dataset = dataset.reset_index(drop=True) datasetY = dataset.copy(deep=True) #dataset = dataset.fillna(method='ffill') training_dates = Iteration.Iteration('2009-08-19', '2014-12-01') testing_dates = Iteration.Iteration('2014-12-02', '2016-04-20') training_dates.calculate_indices(dataset) testing_dates.calculate_indices(dataset) trainDates = [] testDates = [] trainDates.append(training_dates.lowerIndex) trainDates.append(training_dates.upperIndex) testDates.append(testing_dates.lowerIndex) testDates.append(testing_dates.upperIndex) trainX, trainY, testX, testY, cols = ml_dataset.dataset_to_train_using_dates( dataset, trainDates, testDates,
#First 30 row dataset = dataset[31:] dataset = dataset.reset_index(drop=True) #colsToShift = [col for col in dataset.columns if 'HSI' in col or'N225' in col or'AXJO' in col] #dataset[colsToShift] = dataset[colsToShift].shift(-1) #last_row = dataset.shape[0]-1 #dataset = dataset.drop(last_row, axis=0) dataset_all = dataset_all[31:] dataset_all = dataset_all.reset_index(drop=True) #dataset_all[colsToShift] = dataset_all[colsToShift].shift(-1) #last_row = dataset_all.shape[0]-1 #dataset_all = dataset_all.drop(last_row, axis=0) training_dates = Iteration.Iteration('2009-08-19', '2014-12-01') testing_dates = Iteration.Iteration('2014-12-02', '2016-04-20') training_dates.calculate_indices(dataset) testing_dates.calculate_indices(dataset) trainDates = [] testDates = [] trainDates.append(training_dates.lowerIndex) trainDates.append(training_dates.upperIndex) testDates.append(testing_dates.lowerIndex) testDates.append(testing_dates.upperIndex) trainX, trainY, testX, testY, cols = ml_dataset.dataset_to_train_using_dates( dataset, trainDates, testDates,
ensemble.RandomForestClassifier(), ensemble.ExtraTreesClassifier() ]: colsToShift = 1 df_x = dataset.filter(regex=(regex)) last_row = list(range(df_x.shape[0] - colsToShift, df_x.shape[0])) df_x = df_x.drop(last_row, axis=0) df_x = df_x.drop(colY, axis=1) df_y = dataset[colY].shift(-colsToShift) last_row = list(range(df_y.shape[0] - colsToShift, df_y.shape[0])) df_y = df_y.drop(last_row, axis=0) training_dates = Iteration.Iteration('2008-06-17', '2011-09-01') testing_dates = Iteration.Iteration('2012-09-04', '2014-10-06') training_dates.calculate_indices(dataset) testing_dates.calculate_indices(dataset) trainDates = [] testDates = [] trainDates.append(training_dates.lowerIndex) trainDates.append(training_dates.upperIndex) testDates.append(testing_dates.lowerIndex) testDates.append(testing_dates.upperIndex) total = (trainDates[1] - trainDates[0]) + (testDates[1] - testDates[0]) tr = float(trainDates[1] - trainDates[0]) / total * 100.0 te = float(testDates[1] - testDates[0]) / total * 100.0