def train_test_split_df(self): x, y = self.create_np_array() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, shuffle=False) # Since we are working with timeseries data we create batches of sequences to predict next y train_gen = TimeseriesGenerator(data=x_train, targets=y_train, length=5, batch_size=1, shuffle=False, reverse=False, start_index=0, end_index=None) test_gen = TimeseriesGenerator(x_test, y_test, length=5, sampling_rate=1, batch_size=1, shuffle=False, reverse=False, start_index=0, end_index=None) self.train_generator = train_gen self.test_generator = test_gen return (train_gen, test_gen)
def setupData(self, series, val_days=450): """ splits data, scales data, creates generators for the model """ assert val_days > self.length , "val_days must exceed lenght" #split data into train and validation self.train = series.iloc[:-val_days] self.validation = series.iloc[-val_days:] #Apply smoothing filters self.train_smooth = \ gaussian_filter1d(self.train, self.g_filt)\ .reshape(-1,1) self.validation_smooth = \ gaussian_filter1d(self.validation, self.g_filt)\ .reshape(-1,1) #create time series generators self.generator = \ TimeseriesGenerator(data=self.train_smooth,\ targets=self.train_smooth,\ length=self.length,\ batch_size=self.batch_size) self.val_generator = \ TimeseriesGenerator(data=self.validation_smooth,\ targets=self.validation_smooth,\ length=self.length,\ batch_size=self.batch_size)
def setupData(self, series, val_days=450): """ splits data, scales data, creates generators for the model """ assert val_days > self.length, "val_days must exceed length" #split data into train and validation self.train = series.iloc[:-val_days] self.validation = series.iloc[-val_days:] #scale data for neural network suitability self.scaler = MinMaxScaler() self.scaler.fit(self.train.values.reshape(-1, 1)) self.train_scaled = \ self.scaler.transform(self.train.values.reshape(-1,1)) self.validation_scaled = \ self.scaler.transform(self.validation.values.reshape(-1,1)) #create time series generators self.generator = \ TimeseriesGenerator(data=self.train_scaled,\ targets=self.train_scaled,\ length=self.length,\ batch_size=self.batch_size) self.val_generator = \ TimeseriesGenerator(data=self.validation_scaled,\ targets=self.validation_scaled,\ length=self.length,\ batch_size=self.batch_size)
def rnn(company): df = pd.read_csv('pg4_data.csv', parse_dates=True, index_col='date') df = df[df.company == company] df.drop(['ticker', 'company'], inplace=True, axis=1) df['price'] = df.price.apply(lambda x: x.replace(',', '')) df['price'] = pd.to_numeric(df.price, errors='coerce') train_data = df[:-7] test_data = df[-7:] scaler = MinMaxScaler(feature_range=(0, 1)) train_scaled = scaler.fit_transform(train_data) test_scaled = scaler.transform(test_data) generator = TimeseriesGenerator(train_scaled, train_scaled, length=3, batch_size=1) model = Sequential() model.add(SimpleRNN(132, input_shape=(3, 1))) model.add(Dense(64)) model.add(Dense(1)) early_stops = EarlyStopping(monitor='val_loss', patience=2) validation = TimeseriesGenerator(test_scaled, test_scaled, length=3, batch_size=1) model.compile(optimizer='adam', loss='mse') model.fit(generator, epochs=20, validation_data=validation, callbacks=[early_stops]) test_prediction = [] first_eval_batch = test_scaled[-3:] current_batch = first_eval_batch.reshape(1, 3, 1) current_pred = model.predict(current_batch)[0] test_prediction.append(current_pred) current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) true_predictions = scaler.inverse_transform(test_prediction) return round(true_predictions[0][0], 2)
def build_model(df, ticker): # nas/split df.dropna(inplace=True) X = df.drop( columns=['target', 'ticker', 'price open', 'price close', 'price low']) y = df['target'] y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=.2) #scale ss = StandardScaler() X_train_sc = ss.fit_transform(X_train) X_test_sc = ss.transform(X_test) # time series gen tsg_len = 5 tsg_batch = 512 train_seq = TimeseriesGenerator(X_train_sc, y_train, length=tsg_len, batch_size=tsg_batch) test_seq = TimeseriesGenerator(X_test_sc, y_test, length=tsg_len, batch_size=tsg_batch) # Design RNN model = Sequential() model.add(GRU(32, input_dim=X.shape[1], return_sequences=True)) # True if next layer is RNN model.add(GRU(16, return_sequences=False)) # False if next layer is Dense model.add(Dense(8, activation='relu')) model.add(Dense(4, activation='relu')) # output layer model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) hist = model.fit(train_seq, epochs=100, validation_data=test_seq, verbose=0) plot_acc(hist, ticker) plot_loss(hist, ticker) # metrics: # https://stackoverflow.com/questions/54875846/how-to-print-labels-and-column-names-for-confusion-matrix preds = np.argmax(model.predict(test_seq), axis=-1) labels = ['Down', 'Flat', 'Up'] y_cats = np.argmax(y_test, axis=1) cf = confusion_matrix(y_cats[tsg_len:], preds) cf_df = pd.DataFrame(cf, columns=labels, index=labels) cf_df.to_csv(f'./charts/rnn/{resample}/cm/{ticker}.csv', index=True) #pickle model model.save(f'./models/rnn/{resample}/{ticker}_rnn') return hist
def setupData(self, series, val_days=450): """ splits data, scales data, creates generators for the model """ assert val_days > self.length, "val_days must exceed lenght" #split data into train and validation self.train = series.iloc[:-val_days] self.validation = series.iloc[-val_days:] # ============================================================================= # APPLY Smoothing filters # ============================================================================= self.train_smooth = self.train #self.train_smooth = medfilt(self.train,7) self.train_smooth = gaussian_filter1d(self.train_smooth, 0.8) self.validation_smooth = self.validation #self.validation_smooth = medfilt(self.validation,7) self.validation_smooth = gaussian_filter1d(self.validation_smooth, 0.8) # ============================================================================= # SCALE AND GEN THAT # ============================================================================= # ============================================================================= # #scale data for neural network suitability # self.scaler = MinMaxScaler() # self.scaler.fit(self.train_smooth.reshape(-1,1)) # # self.train_scaled = \ # self.scaler.transform(self.train_smooth.reshape(-1,1)) # # self.validation_scaled = \ # self.scaler.transform(self.validation_smooth.reshape(-1,1)) # ============================================================================= # ============================================================================= # NO SCALE # ============================================================================= self.train_scaled = \ self.train_smooth.reshape(-1,1) self.validation_scaled = \ self.validation_smooth.reshape(-1,1) #create time series generators self.generator = \ TimeseriesGenerator(data=self.train_scaled,\ targets=self.train_scaled,\ length=self.length,\ batch_size=self.batch_size) self.val_generator = \ TimeseriesGenerator(data=self.validation_scaled,\ targets=self.validation_scaled,\ length=self.length,\ batch_size=self.batch_size)
def series_generator(scaled_train, scaled_validation, n_input): train_generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1) validation_generator = TimeseriesGenerator(scaled_validation, scaled_validation, length=n_input, batch_size=1) return train_generator, validation_generator
def timeserieGenerator(length=12, batch_size=1): train_generator = TimeseriesGenerator(scale_train, scale_train, length=length, batch_size=batch_size) validation_generator = TimeseriesGenerator(scale_test, scale_test, length=length, batch_size=batch_size) return train_generator, validation_generator, length
def data_generator(data, backward, forward, mean, std): data = data.values N = len(data) normalized = ((data - mean) / std) target, _ = TimeseriesGenerator(normalized, normalized, length=forward, batch_size=N)[0] input, output = TimeseriesGenerator(normalized[:-forward], target, length=backward, batch_size=N - forward)[0] return input, output
def genarate_timeseries(self, x_train, y_train, x_val, y_val, x_test): train_timeseries = TimeseriesGenerator(x_train, y_train, length=self.past_days, batch_size=512) val_timeseries = TimeseriesGenerator(x_val, y_val, length=self.past_days, batch_size=512) test_timeseries = TimeseriesGenerator(x_test, np.zeros((x_test.shape[0], 1)), length=self.past_days, batch_size=512) return train_timeseries, val_timeseries, test_timeseries
def generator_builder(data, targets, TIMESTEPS): generator = TimeseriesGenerator(data=data, targets=targets, length=TIMESTEPS) return generator
def trainSimpleLSTM(model, data, history_window, n_batch, n_epochs): print(data) trainingData = data #Calculate this once scaler = MinMaxScaler() scaler.fit(trainingData) trainingData = scaler.transform(trainingData) generator = TimeseriesGenerator(trainingData, trainingData, length=history_window, batch_size=n_batch) optimizer = keras.optimizers.Adam(learning_rate=0.001) model.compile(optimizer=optimizer, loss='mse') history = model.fit_generator(generator, epochs=n_epochs, verbose=1) hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch """ plt.scatter(x=hist['epoch'],y=hist['loss']) plt.show() """ return trainingData, scaler
def train(self, training_data_tuple=None, verbose=False): try: training_params = self._model_details.get("model_config") if training_params is None: msg = f"model_config not in {self._model_details}" raise KeyError(msg) elif training_data_tuple is None: msg = "training_data_tuple cannot be None" raise ValueError(msg) else: X_train, X_test, y_train, y_test, train_test_split = training_data_tuple num_inputs = training_params.get("num_inputs") batch_size = training_params.get("batch_size") epochs = training_params.get("epochs") generator = TimeseriesGenerator(X_train, y_train, length=num_inputs, batch_size=batch_size) self._model.fit_generator(generator, epochs=epochs) persisted_model_path = self._model_details.get( "persisted_model_path") self._model.save(persisted_model_path) if verbose: print(f"Saved recurrent_nn model type to " f"{persisted_model_path}") except Exception as e: msg = "Error in train" raise e(msg)
def route_to_ts(f, window_len, max_len=250): route = np.load(f).astype("float32")[-max_len:] X = route[1:, :-1] y = route[:-1, -1] data_gen = TimeseriesGenerator(X, y, length=window_len, batch_size=len(X)) X_ts = data_gen[0][0] y_ts = data_gen[0][1] * SECONDS_PER_YEAR / 60 return X_ts, y_ts
def pred(f1): flag = 0 n = 0 if f1.isdigit() == True: flag = 1 else: flag = 0 if flag == 1: f = 0 f = int(f1) if f > 0: n_input = f n_features = 1 generator = TimeseriesGenerator(train, train, length=n_input, batch_size=6) model = Sequential() model.add( LSTM(200, activation='relu', input_shape=(n_input, n_features))) model.add(Dropout(0.15)) model.add(Dense(1)) optimizer = keras.optimizers.Adam(lr=0.001) model.compile(optimizer=optimizer, loss='mse') history = model.fit_generator(generator, epochs=100, verbose=1) pred_list = [] batch = train[-n_input:].reshape((1, n_input, n_features)) for i in range(n_input): pred_list.append(model.predict(batch)[0]) batch = np.append(batch[:, 1:, :], [[pred_list[i]]], axis=1) import pandas as pd ts = pd.Timestamp('2019-10-10 07:15:11') do = pd.tseries.offsets.DateOffset(n=2) add_dates = [ pd.Timestamp(df.index[-1]) + DateOffset(months=x) for x in range(0, f + 1) ] future_dates = pd.DataFrame(index=add_dates[1:], columns=df.columns) df_predict = pd.DataFrame(scaler.inverse_transform(pred_list), index=future_dates[-n_input:].index, columns=['Prediction']) df_proj = pd.concat([df, df_predict], axis=1) res = df_predict.reset_index() res.columns = ['Date', 'count of cases'] res['count of cases'] = res['count of cases'].apply(np.int64) return res
def create_generator(dataset, params, shuffle=True): # DECONSTRUCT DATASET features = dataset['features'] labels = dataset['labels'] # DECONSTRUCT PARAMS batch = params['batch'] window = params['window'] # GENERATE & RETURN return TimeseriesGenerator(features, labels, length=window, batch_size=batch, shuffle=shuffle)
def forecast(df, length_generator, fc_period): full_scaler = MinMaxScaler() scaled_full_data = full_scaler.fit_transform(df) length = length_generator n_features = 1 generator = TimeseriesGenerator(scaled_full_data, scaled_full_data, length=length, batch_size=1) model = Sequential() model.add(LSTM(100, activation="relu", input_shape=(length, n_features))) # can add dropout too model.add(Dense(1)) model.compile(optimizer="adam", loss="mse") model.fit(generator, epochs=75) forecast = [] forecast_period = fc_period first_eval_batch = scaled_full_data[-length:] current_batch = first_eval_batch.reshape((1, length, n_features)) for i in range(forecast_period): # get prediction 1 time atamp ahead ([0] is for grabbing just the number insede the brackets) current_pred = model.predict(current_batch)[0] # store prediction forecast.append(current_pred) # update batch to now include prediction and drop first value current_batch = np.append(current_batch[:, 1:, :], [[current_pred]], axis=1) forecast = full_scaler.inverse_transform(forecast) forecast_index = pd.date_range(start="2020-05-05", periods=forecast_period, freq="D") forecast_df = pd.DataFrame(data=forecast, index=forecast_index, columns=["Forecast"]) forecast_df["Forecast"] = forecast_df["Forecast"].apply(lambda x: int(x)) return forecast_df
def nn_model(scaled_train_array,y_scaled_train_array,length,epochs): length = 12 # Length of the output sequences (in number of timesteps) batch_size = 1 #Number of timeseries samples in each batch generator = TimeseriesGenerator(scaled_train_array, y_scaled_train_array, length=length, batch_size=batch_size) model = Sequential() model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=5, strides=1, padding="causal", activation="tanh", input_shape=(None,(scaled_train_array.shape[1])))) model.add(tf.keras.layers.LSTM(512, activation = 'tanh', dropout=0.25, recurrent_dropout= 0.25, return_sequences=True)) model.add(tf.keras.layers.LSTM(512, dropout=0.25, recurrent_dropout= 0.25, activation = 'tanh', return_sequences=True)) model.add(tf.keras.layers.LSTM(512, activation = 'tanh')) model.add(tf.keras.layers.Dense(100)) model.add(tf.keras.layers.Dense(1)) optimizer = tf.keras.optimizers.Adam(lr=0.0001) model.compile(optimizer=optimizer, loss='mse') model.summary() #--------------------------------- #from tensorflow.keras.callbacks import EarlyStopping #early_stop = EarlyStopping(monitor='val_loss',patience=1) #validation_generator = TimeseriesGenerator(scaled_test_array,y_scaled_test_array, # length=length, batch_size=batch_size) model.fit_generator(generator,epochs=epochs) # validation_data=validation_generator) # callbacks=[early_stop]) model.history.history.keys() losses = pd.DataFrame(model.history.history) losses.plot() return model
def create_univariate_dataset(self, data_type: str = 'train', sampling_rate: int = 1, stride: int = 1, batch_size: int = 1): if data_type == 'train': data = self.train elif data_type == 'test': data = self.test generator = TimeseriesGenerator(data, data, length=self.look_back, sampling_rate=sampling_rate, stride=stride, batch_size=batch_size) return generator
def __init__(self, x_path, folder_name, y_path, to_fit=True, batch_size=2, seq_len=15): self.x_path = x_path + folder_name self.folder_name = folder_name self.y_path = y_path self.to_fit = to_fit self.all_frames = self.get_all_frames(self.x_path) self.targets = self.get_Y(y_path, folder_name) self.series_data = TimeseriesGenerator(self.all_frames, self.targets, length=seq_len, batch_size=batch_size) self.len = len(self.series_data)
def windowed_dataset(x, y, win_sz, batch_sz, kind='regress'): """ Helper to prepare a windowed data set from a series kind : "regress" or "class" """ if kind == 'class': # to class labels y = y > 0 dataset = TimeseriesGenerator(x, y, win_sz, sampling_rate=1, shuffle=True, batch_size=batch_sz) return dataset
def predict(self, pred_data=None): try: if pred_data is None: msg = "pred_data cannot be None type" raise Exception(msg) else: pred_params_dict = self._model_details.get("model_config") generator_test = TimeseriesGenerator( pred_data, pred_data, length=pred_params_dict["num_inputs"] - 1, batch_size=1) pred_result = self._format_predict( self._model.predict(generator_test)) return pred_result except Exception as e: msg = "Error in predict" raise e(msg)
def __getitem__(self, index): images_folder = self.list_X[index] images_list = sorted(os.listdir(self.x_path + images_folder)) all_frames = [] for img in images_list: all_frames.append( np.array(cv2.imread(self.x_path + images_folder + '/' + img))) all_frames = np.stack(all_frames).astype(np.float16) key = images_folder.split('_')[:2] key = '_'.join(key) Y = np.array(self.dict_Y[key]) all_frames, targets = self.check(all_frames, Y) series_data = TimeseriesGenerator(all_frames, targets, length=self.seq_len, batch_size=self.batch_size) return series_data
def create_multivariate_dataset(self, data_type='train', outcome: str = 'adjclose', sampling_rate: int = 1, stride: int = 1, batch_size: int = 1): if data_type == 'train': outcome = self.train['outcome'] data = self.train['predictors'] elif data_type == 'test': outcome = self.test['outcome'] data = self.test['predictors'] generator = TimeseriesGenerator(data, outcome, length=self.look_back, sampling_rate=sampling_rate, stride=stride, batch_size=batch_size) return generator
def predict_proba(self, X): """ Predict proba """ if not self.fitted: raise NotFittedError() data_gen = TimeseriesGenerator(X, np.empty(len(X)), length=self.length, sampling_rate=1, batch_size=self.batch_size) y_pred = self.model.predict_generator(data_gen) # Scale to [-1 1] y_pred_proba = np.zeros((len(y_pred), 2)) y_pred_proba[:, 1] = y_pred.ravel() y_pred_proba[:, 0] = 1 - y_pred.ravel() self.y_pred = y_pred self.y_pred_proba = y_pred_proba return y_pred_proba
def _fitRNN(self): treinar = True while treinar: gerador = TimeseriesGenerator(numpy.array(self._data_set[0]) / 255.0, numpy.array(self._data_set[1]), length=self.time_steps, batch_size=self.batch_size) historico = inteligencia.modelo.fit_generator(gerador, epochs=self.epochs, shuffle=self.suffle, verbose=1) self._atualizarLog(historico) if self._iterativo: try: treinar = input("Continuar (s/n)? ") == 's' except: treinar = False else: treinar = False
data[:trainLen, 3][i:i + stepsForward] for i in range(trainLen - stepsForward) ]) yTest = np.array([ data[trainLen + xLen + 2:, 3][i:i + stepsForward] for i in range(valLen - xLen - stepsForward - 1) ]) yScaler = StandardScaler() yScaler.fit(yTrain) yTrain = yScaler.transform(yTrain) yTest = yScaler.transform(yTest) #Создаем генератор для обучения trainDataGen = TimeseriesGenerator(xTrain, yTrain, length=xLen, sampling_rate=1, batch_size=20) #Создаем аналогичный генератор для валидации при обучении testDataGen = TimeseriesGenerator(xTest, yTest, length=xLen, sampling_rate=1, batch_size=20) #Создаём нейронку modelD = Sequential() modelD.add(Dense(150, input_shape=(xLen, 5), activation="linear")) # 5 - количество каналов modelD.add(Flatten())
epochs = 5 train_split_size = 0.7 test_set_per_tf = [] # Train for timeframe_idx in range(len(time_frames)): df = cross_timeframe_dfs[timeframe_idx].copy() Xtrain, Xtest, ytrain, ytest, train_test_split = create_train_test_set( df=df, train_split=train_split_size) n_features = Xtrain.shape[1] test_set_per_tf.append((Xtest, ytest)) # Setup model & TimeseriesGenerator, and train the model model = create_model(n_inputs=n_inputs, n_features=n_features) generator = TimeseriesGenerator(Xtrain, ytrain, length=n_inputs, batch_size=batch_size) model.fit_generator(generator, epochs=epochs) print(f"model saved as model_tf_{time_frames[timeframe_idx]}") model.save(f"model_tf_{time_frames[timeframe_idx]}.h5") # Visualize the loss function over the training epochs loss_val_per_epoch = model.history.history['loss'] plt.plot(range(len(loss_val_per_epoch)), loss_val_per_epoch) plt.title('Loss vs. training epochs') plt.ylabel('Loss') plt.xlabel('epochs') plt.show() # #### Section D.1 - Model Validation (Linear Regression Feature)
scaler_X = MinMaxScaler() scaler_y = MinMaxScaler() data_X_scaled = scaler_X.fit_transform(np.array(data_X).reshape(-1, 2)) data_y_scaled = scaler_y.fit_transform(np.array(data_y).reshape(-1, 1)) # Делим данные на учебные и тестовые: train_X = data_X_scaled[:-90] train_y = data_y_scaled[:-90] test_X = data_X_scaled[-90:] test_y = data_y_scaled[-90:] # Создаем генераторы временных рядов (учитываем 3 последних значения): train_data_gen = TimeseriesGenerator(train_X, train_y, length=3, sampling_rate=1, stride=1, batch_size=50) test_data_gen = TimeseriesGenerator(test_X, test_y, length=3, sampling_rate=1, stride=1, batch_size=10) # Создаем модель: model = Sequential([LSTM(4, recurrent_dropout=0.15, return_sequences=True, input_shape=(3, 2)), LSTM(4, recurrent_dropout=0.15, return_sequences=False), Dense(1)]) model.compile(optimizer='adam', loss='mse') model.summary() # Обучаем модель (в процессе обучения отслеживаем показатель MSE,
test = covid_cases[test_ind:] print(len(test),'train') print(len(train),'train') #we divide the historical data into train and test clusters #length of train dataset = 223 #length of test dataset = 25 scaler = MinMaxScaler() scaler.fit(np.reshape(train,(223,1))) scaled_train = scaler.transform(np.reshape(train,(223,1))) scaled_test = scaler.transform(np.reshape(test,(25,1))) #define prediction training length pred_len = 20 #define future prediction numbers batch_size=1 #Create training generator generator = TimeseriesGenerator(scaled_train,scaled_train, length=pred_len,batch_size=batch_size) #Create validation generator val_generator = TimeseriesGenerator(scaled_test,scaled_test, length=pred_len,batch_size=batch_size) #The number of features in which the data will predict the next values. n_features = 1 #Create the LSTM Model model = Sequential() model.add(LSTM(60,input_shape=(pred_len,n_features))) model.add(Dense(1)) model.compile(optimizer='rmsprop',loss='mse') #Declare an early stop early_stop = EarlyStopping(monitor='val_loss',patience=2) #Fit the training data into the model