def create_supervised_ds(window_size): path = config.selected_path input_file = config.input_file_ds # To pair with the other models, this model gets 1438 first rows. series = read_csv(path + input_file, header=0, sep=',', nrows=1438) series = series.iloc[::-1] avg = series['Avg'] global avg_values avg_values = avg.values lag = 1 # Stationary Data # Difference only works for one step. Please implement for other steps avg_diff_values = data_misc.difference(avg_values, lag) # Avg values converted into supervised model avg_supervised = data_misc.timeseries_to_supervised( avg_diff_values, window_size) # The first [Window size number] contains zeros which need to be cut. avg_supervised = avg_supervised.values[window_size:, :] supervised = avg_supervised print('Window Size: %s' % str(window_size)) print('Lag: %s' % str(lag)) return supervised
def supervised_diff_dt(df_data, window_size): processed_columns = [] for column_name in df_data: values_column = df_data[column_name] diff_col = data_misc.difference(values_column, 1) supervised_col = data_misc.timeseries_to_supervised(diff_col, window_size) # We drop the last column from weight_supervised because it is not the target we want supervised_col = supervised_col.values[:, :-1] processed_columns.append(supervised_col) flag = True result = [] for col in processed_columns: if flag: result = col flag = False else: result = np.concatenate((result, col), axis=1) return result
def compare(y_test, y_predicted): rmse = sqrt(mean_squared_error(y_test, y_predicted)) return rmse for x in range(12, 13): window_size = x # 15 print('Window Size: %i' % (x)) series = read_csv('../data/airline-passengers.csv', header=0, sep='\t') date = series['Date'] series = series.drop(['Date'], axis=1) date = date.iloc[window_size:] date = date.values raw_values = series.values raw_values = data_misc.timeseries_to_supervised(raw_values, window_size) # print(raw_values) raw_values = raw_values.values[window_size:, :] size_raw_values = len(raw_values) split = int(size_raw_values * 0.80) train, test = raw_values[0:split], raw_values[split:] # fit transform transformer = StandardScaler() transformer.fit(train) train = transformer.transform(train) test = transformer.transform(test) print(train) print(test)
series = read_csv(path + input_file, header=0, sep=',', nrows=1438) series = series.iloc[::-1] dfx = DataFrame() for column in columns: dfx[column] = series[column] date = series['Date'] avg = series['Avg'] date = date.iloc[window_size:] date = date.values avg_values = avg.values avg_values = data_misc.timeseries_to_supervised(avg_values, window_size) avg_values = avg_values.values[window_size:, :] print('-----') #raw_values = range(len(avg_values)) #raw_values = [i*0 for i in raw_values] raw_values = avg_values for column_name in columns: col = dfx[column_name] col_values = data_misc.timeseries_to_supervised(col, window_size) col_values = col_values.values[window_size:, :] # Concatenate with numpy raw_values = np.concatenate((col_values,raw_values), axis=1)
cut_beginning = [window_size_avg, window_size_btc] if not use_bitcoin_columns and use_trend_columns: cut_beginning = [window_size_avg, window_size_trend] total_window_size = max(cut_beginning) print('----------') print(combination) print('Model %s / %s' % (str(model_count), str(total_models))) # Stationary Data # Difference only works for one step. Please implement for other steps avg_diff_values = data_misc.difference(avg_values, lag) # Avg values converted into supervised model avg_supervised = data_misc.timeseries_to_supervised( avg_diff_values, window_size_avg) # avg_supervised = data_misc.timeseries_to_supervised(avg_values, window_size_avg) supervised = avg_supervised.values # we cut according to the biggest window size supervised = supervised[total_window_size:, :] # For the previous year we make it supervised and diff. avg_previous = supervised_diff_dt(df, window_size_avg) # we cut according to the biggest window size avg_previous = avg_previous[total_window_size:, :] # Concatenate with numpy supervised = np.concatenate((avg_previous, supervised), axis=1)
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet from sklearn.linear_model import ElasticNet from pandas import read_csv from sklearn.metrics import mean_squared_error from math import sqrt from Util import misc from Util import data_misc series = read_csv('../Thesis/Bitcoin_historical_data_processed_supervised2.csv', header=0, sep='\t') # transform data to be stationary raw_values = series['Avg'].values diff_values = data_misc.difference(raw_values, 1) # transform data to be supervised learning supervised = data_misc.timeseries_to_supervised(diff_values, 1) supervised_values = supervised.values # split data into train and test-sets train, test = supervised_values[0:-10], supervised_values[-10:] # transform the scale of the data scaler, train_scaled, test_scaled = data_misc.scale(train, test) X_train, y_train = train_scaled[:, 0:-1], train_scaled[:, -1] X_test, y_test = test_scaled[:, 0:-1], test_scaled[:, -1] print(X_train.shape) X_train = X_train.reshape((X_train.shape[0], 1)) print(X_train.shape) print(X_test.shape)
date = series['Date'] avg = series['Avg'] lag = 365 avg, avg_previous = data_misc.slide_data(avg.values, lag) date, date_previous = data_misc.slide_data(series['Date'].values, lag) df = DataFrame({'date_previous': date_previous, 'avg_previous': avg_previous, 'date': date, 'avg': avg}) print(df.head(10)) # The data is made supervised avg = data_misc.timeseries_to_supervised(avg, window_size) avg_previous = data_misc.timeseries_to_supervised(avg_previous, window_size) # The first [Window size number] contains zeros which need to be cut. avg = avg.values[window_size:, :] avg_previous = avg_previous.values[window_size:, :] # series we no longer use series obj because contains columns that we don't need raw_values = dfx.values[lag:] # print(series) # We cut the values which are zero raw_values = raw_values[:-window_size, :] date_previous = DataFrame(date_previous) date_previous = date_previous.values[:-window_size]
from Util import misc from Util import data_misc import numpy as np import itertools from model import values avg_window_size = 4 weight_window_size = 2 avg = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] btc = [110, 120, 130, 140, 150, 160, 170, 180, 190, 200] trend = [210, 220, 230, 240, 250, 260, 270, 280, 290, 300] #btc=[[110,210], [120,220], [130,230], [140,240], [150,250], [160,260], [170,270], [180,280], [190,290], [200,300]] print(btc) avg_supervised = data_misc.timeseries_to_supervised(avg, avg_window_size) btc_supervised = data_misc.timeseries_to_supervised(btc, weight_window_size) # We drop the last column from weight_supervised because it is not the target we want btc_supervised = btc_supervised.values[:, :-1] # We pair the avg_supervised column with the weight_supervised cut_beginning = [avg_window_size, weight_window_size] cut_beginning = max(cut_beginning) avg_supervised = avg_supervised.values[cut_beginning:, :] btc_supervised = btc_supervised[cut_beginning:, :] # Concatenate with numpy supervised = np.concatenate((btc_supervised, avg_supervised), axis=1)
return result avg_window_size = 3 btc_window_size = 4 avg = [10, 22, 30, 42, 50, 62, 70, 82, 90, 102] btc = [110, 123, 130, 143, 150, 163, 170, 183, 190, 203] trend = [212, 224, 232, 244, 252, 264, 272, 284, 292, 304] df = DataFrame({'btc': btc, 'trend': trend}) print(btc) avg_diff = data_misc.difference(avg, 1) avg_supervised = data_misc.timeseries_to_supervised(avg_diff, avg_window_size) print(avg_supervised) btc_supervised = supervised_diff_dt(df, btc_window_size) # We pair the avg_supervised column with the weight_supervised cut_beginning = [avg_window_size, btc_window_size] cut_beginning = max(cut_beginning) avg_supervised = avg_supervised.values[cut_beginning:, :] btc_supervised = btc_supervised[cut_beginning:, :] # Concatenate with numpy supervised = np.concatenate((btc_supervised, avg_supervised), axis=1) print(supervised)