def prepare(self, dfn2, pct=0.8, lables=None, lableIndex=0): data = dfn2 if (type(dfn2) == pd.core.frame.DataFrame): data = dfn2.values trni = int(len(data) * pct) if (pct < 1) else pct scaler = StandardScaler() dfs = scaler.fit(data[:trni]) dfs.mean_, dfs.scale_ dfscaled = (data - dfs.mean_) / dfs.scale_ lables = lables if lables is not None else dfscaled[:, lableIndex] #lables = makestep(lables, 0, self.nsteps) lables = makestepTS(lables, 0, self.nsteps) dfscaled = dfscaled[:len(lables)] trnX, trnY = dfscaled[:trni], lables[:trni] valX, valY = dfscaled[trni:], lables[trni:] print( f"#Training: {trni} samples, Test: {len(valY), len(valX)} samples!" ) trng = TimeseriesGenerator(trnX, trnY, **self.tsParam) valg = TimeseriesGenerator(valX, valY, **self.tsParam) #dfns is the scaled data self.dfscaled, self.lables, self.lableIndex = dfscaled, lables, lableIndex self.df, self.trng, self.valg, self.scaler = dfn2, trng, valg, scaler return self.dfscaled, self.trng, self.valg, self.scaler
def getGenerators(conf, normeddf, inputs, ouputs): modelFile = conf['modelFile'] or "models/simpleModel.h5" tsParams = conf['tsParams'] lookahead = conf['lookahead'] history = tsParams['length'] train_pct = conf.get('train_pct', 0.9) train_count = conf.get('train_count', int(len(normeddf) * train_pct) ) X, y = normeddf[inputs].values, normeddf[ouputs].values X=X[:(-lookahead+1) or None] y=y[lookahead-1:] Xtrn,ytrn = X[:train_count], y[:train_count], Xtst,ytst = X[train_count:], y[train_count:], tsParams1 = tsParams.copy() tsParams2 = tsParams.copy() tsParams2['batch_size'] =1 trng1 = TimeseriesGenerator(Xtrn, ytrn, **tsParams1 ) valg1 = TimeseriesGenerator(Xtst, ytst, **tsParams1 ) valg2 = TimeseriesGenerator(X, y, **tsParams2 ) #history, tsParams1, len(trng1), len(valg1), len(valg2), #trng1[0] #print(Xtrn.shape, "\n", Xtrn, "\n", ytrn.shape, "\n", ytrn, Xtst.shape) return modelFile, history, lookahead, trng1, valg1, valg2, X, y
def test_TimeseriesGenerator_on_text(): txt = bytearray("Keras is simple.", 'utf-8') data_gen = TimeseriesGenerator(txt, txt, hlength=10, batch_size=1, gap=1) # for i in range(len(data_gen)): # print(data_gen[i][0].tostring(), "->'%s'" % data_gen[i][1].tostring()) assert data_gen[-1][0].shape == (1, 10) and data_gen[-1][1].shape == (1, ) assert data_gen[-1][0].tostring() == b" is simple" assert data_gen[-1][1].tostring() == b"." data_gen = TimeseriesGenerator(txt, txt, hlength=10, target_seq=True, batch_size=1, gap=1) assert data_gen[-1][0].shape == (1, 10) and data_gen[-1][1].shape == (1, 10, 1) # for i in range(len(data_gen)): # print(data_gen[i][0].tostring(), "->'%s'" % data_gen[i][1].tostring()) assert data_gen[0][1].tostring() == b"eras is si"
def test_TimeSeriesGenerator_doesnt_miss_any_sample1(): x = np.array([[i] for i in range(10)]) for gap in range(10): for length in range(1, 11 - gap): expected = len(x) - length + 1 - gap if expected > 0: g = TimeseriesGenerator(x, x, length=length, batch_size=1, gap=gap) # print('gap: %i, hlength: %i, expected-len:%i, len: %i' % (g.gap, g.hlength, expected, g.len)) # for i in range(len(g)): # print(i,g[i]) actual = len(g) assert expected == actual x = np.array([i for i in range(7)]) g = TimeseriesGenerator(x, x, hlength=3, batch_size=2) expected_len = ceil((len(x) - g.hlength + 1.0) / g.batch_size) print('gap: %i, hlength: %i, expected-len:%i, len: %i' % (g.gap, g.hlength, expected_len, g.len)) # for i in range(len(g)): # print(i, g[i]) assert len(g) == expected_len
def LSTM_model(self): split_percent = 0.8 num_epochs = 8 # data_no_date = StockData().drop_date() close_price = self.data['Close'].values close_price = close_price.reshape((-1, 1)) split = int(split_percent * len(close_price)) price_train = close_price[:split] price_test = close_price[split:] #check on batch size train_gen = TimeseriesGenerator(price_train, price_train, length=self.look_back, batch_size=1) test_gen = TimeseriesGenerator(price_test, price_test, length=self.look_back, batch_size=1) self.model.add( LSTM(10, activation='relu', input_shape=(self.look_back, 1))) self.model.add(Dense(1)) self.model.compile(optimizer='adam', loss='mse') self.model.fit_generator(train_gen, epochs=num_epochs, verbose=1)
def createGenerators(self): """ Creates generators for the training, evaluation and testing data as they make life here a whole lot easier. The input is split here into training (2/3 of input data), evaluation data (1/6) and testing data (1/6.) Returns: sets the class variables: - train_gen: Generator for training data - valid_gen: Generator for validation data - test_gen: Generator for testing data """ f0 = 2/3 * len(self.data.value)+27 f1 = 5/6 * len(self.data.value)+5 self.train_gen = TimeseriesGenerator( self.data.value[:int(f0)], self.data.value[:int(f0)], sampling_rate=1,shuffle=False, #shuffle=False is very important as we are dealing with continous timeseries length=self.time_steps, batch_size=self.batch_size ) self.valid_gen = TimeseriesGenerator( self.data.value[int(f0):int(f1)], self.data.value[int(f0):int(f1)], sampling_rate=1,shuffle=False, length=self.time_steps, batch_size=self.batch_size ) self.test_gen = TimeseriesGenerator( self.data.value[int(f1):], self.data.value[int(f1):], sampling_rate=1,shuffle=False, length=self.time_steps, batch_size=1 )
def test_TimeSeriesGenerator_doesnt_miss_any_sample(): x = np.array([[i] for i in range(10)]) for length in range(3, 10): g = TimeseriesGenerator(x, x, length=length, batch_size=1) expected = max(0, len(x) - length) actual = len(g) assert expected == actual if len(g) > 0: # All elements in range(length, 10) should be used as current step expected = np.arange(length, 10).reshape(-1, 1) y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) assert_allclose(y, expected) x = np.array([[i] for i in range(23)]) strides = (1, 1, 5, 7, 3, 5, 3) lengths = (3, 3, 4, 3, 1, 3, 7) batch_sizes = (6, 6, 6, 5, 6, 6, 6) shuffles = (False, True, True, False, False, False, False) for stride, length, batch_size, shuffle in zip(strides, lengths, batch_sizes, shuffles): g = TimeseriesGenerator(x, x, length=length, sampling_rate=1, stride=stride, start_index=0, end_index=None, shuffle=shuffle, reverse=False, batch_size=batch_size) if shuffle: # all batches have the same size when shuffle is True. expected_sequences = ceil( (23 - length) / float(batch_size * stride)) * batch_size else: # last batch will be different if `(samples - length) / stride` # is not a multiple of `batch_size`. expected_sequences = ceil((23 - length) / float(stride)) expected_batches = ceil(expected_sequences / float(batch_size)) y = [g[ix][1] for ix in range(len(g))] actual_sequences = sum(len(_y) for _y in y) actual_batches = len(y) assert expected_sequences == actual_sequences assert expected_batches == actual_batches
def sequence_to_supervised(look_back, close_train, close_test): from keras.preprocessing.sequence import TimeseriesGenerator train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20) test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1) return train_generator, test_generator
def combined_generator(inputMatrix, labels, timesteps, batch_size): img_gen = TimeseriesGenerator(inputMatrix, labels, length=timesteps, batch_size=batch_size) ang_gen = TimeseriesGenerator(labels, labels, length=timesteps, batch_size=batch_size) for (inputMatrix, outputLabels0), (inputLabels, outputLabels) in zip(img_gen, ang_gen): yield [inputMatrix, inputLabels], outputLabels
def buildModel(self, currentKey): trainSize = 0.8 nuerons = [10, 15, 20 ,25] num_epochs = [100, 250, 500, 1000] reps = 5 #self.time_steps time = self.data['relative_minute'] close = self.data['close'].values trainSplit = int(len(self.data) * trainSize) train, test = close[:trainSplit], close[trainSplit:] train = np.reshape(train, (-1,1)) test = np.reshape(test, (-1,1)) trainData = TimeseriesGenerator(train, train, length=self.time_steps, batch_size=20) testData = TimeseriesGenerator(test, test, length=self.time_steps, batch_size=1) results = {} testVals = self.scaler.inverse_transform(test[self.time_steps:]) for _ in range(reps): for n in nuerons: for e in num_epochs: #######init Model########## model = Sequential() model.add( LSTM(n, activation='relu', input_shape=(self.time_steps, 1)) ) model.add(Dense(1)) model.compile(optimizer='adam', loss='mse') model.fit(trainData, epochs=e, verbose=1) ########################### pred = model.predict_generator(testData) squared = [] for p, t in zip(pred, testVals): squared.append((t - p) ** 2) mean = sum(squared) / len(squared) results[mean[0]] = model bestModel = results[min(results.keys())] path = currentKey + ".h5" bestModel.save(path)
def fit(self, train, dev, n_input=24, nr_epochs=10, debug=True): n_features = train.shape[1] # generator = TimeseriesGenerator(train, train[:, 0], length=n_input) dev_generator = TimeseriesGenerator(dev, dev[:, 0], length=n_input) batch_size = 32 epoch_steps = train.shape[0] / batch_size # self.model0 = self.build_model(n_input, n_features) clr_cb = CyclicLR(mode='triangular2', base_lr=0.00001, max_lr=0.01, step_size=epoch_steps, gamma=0.8) history = self.model0.fit_generator(generator, validation_data=dev_generator, shuffle=False, epochs=nr_epochs, verbose=1, steps_per_epoch=epoch_steps, callbacks=[clr_cb]) self.history = history self.n_input = n_input
def fit_model_stage(self, train: np.array) -> keras.models.Sequential: # Example taken from # https://medium.com/@cdabakoglu/time-series-forecasting-arima-lstm-prophet-with-python-e73a750a9887 n_features = 1 batch_size = 64 length = self.window_size generator = TimeseriesGenerator(train, train, length=length, batch_size=batch_size) lstm_model = keras.models.Sequential() lstm_model.add( layers.LSTM( 40, activation='relu', input_shape=(None, n_features), # batch_size=batch_size, stateful=False) ) # Stateful true for correlated long-term predictions lstm_model.add(layers.Dense(1)) lstm_model.compile(optimizer='adam', loss='mse') # Fit the model lstm_model.fit(generator, epochs=20) return lstm_model
def to_ts_data(stock, stocks_df, sequence_inlength=60): stock_arr = stocks_df[stocks_df.company_name == stock]['Adj Close'].to_numpy() data, targets = get_data_targets(stock_arr, sequence_inlength) # display(data, targets) data_tr, targets_tr, data_test, targets_test = test_train_split(data, targets, split=0.8) data_gen_train = TimeseriesGenerator(data_tr, targets_tr, length=sequence_inlength) data_gen_test = TimeseriesGenerator(data_test, targets_test, length=sequence_inlength) ##shape (number of batches, input number of steps before prediction, feature-dimensions for input) return data_gen_train, data_gen_test
def prepForPredict(self, dfn2, start=0, howmany=100, lables=None, scaleout=True, title=""): scaler = self.scaler tsParams = self.tsParam.copy() tsParams['batch_size'] = howmany data = dfn2 if (type(dfn2) == pd.core.frame.DataFrame): data = dfn2.values data = (data - scaler.mean_) / scaler.scale_ lables = lables if lables is not None else data[:, self.lableIndex] lables = makestepTS(lables, 0, self.nsteps) data = data[:len(lables)] g = TimeseriesGenerator(dfscaled[start:], lables[start:], **tsParams) ## model = self.model x, y = g[0] h = model.predict(x) h1, y1 = h, y if (scaleout): h1 = h * scaler.scale_[self.lableIndex] h1 = h1 + scaler.mean_[self.lableIndex] y1 = y * scaler.scale_[self.lableIndex] y1 = y1 + scaler.mean_[self.lableIndex] return y1, h1
def getTestBiwiForImageModel(testSubjects, timesteps, overlapping, output_begin, num_outputs, batch_size, stateful=False, record=False): test_generators, test_labelSets = [], [] testBiwi = readBIWIDataset( subjectList=testSubjects ) #, scaling = False, timesteps = timesteps, overlapping = overlapping for inputMatrix, labels in testBiwi: labels = labels[:, output_begin:output_begin + num_outputs] if timesteps == None: test_generators.append((inputMatrix, labels)) else: start_index = 0 if stateful: start_index = (inputMatrix.shape[0] % batch_size) - 1 if batch_size > 1 else 0 data_gen = TimeseriesGenerator(inputMatrix, labels, length=timesteps, batch_size=batch_size, start_index=start_index) test_generators.append(data_gen) if stateful: labels = labels[start_index:] test_labelSets.append(labels) return test_generators, test_labelSets
def generate_one_long_series(orig_data, input_shape=15, out_shape=2): for day_time_series in orig_data: generator = TimeseriesGenerator( day_time_series, day_time_series, length=input_shape, )
def getDataGeneratorForData(dataArray): series = array(dataArray) # reshape series = series.reshape((len(series), 1)) # define generator generator = TimeseriesGenerator(series, series, length=3, batch_size=8) return generator
def generate_batches_for_customer(self, data, targets): """[keras docs](https://keras.io/preprocessing/sequence/#timeseriesgenerator)""" return TimeseriesGenerator(data, targets, length=SEQUENCE_LENGTH, sampling_rate=SAMPLING_RATE, batch_size=BATCH_SIZE)
def create_test_data_onenode(self, nodename, scalerdata): """ 离线测试集的构建 对要进行预测的nodename进行预测阶段需要的输入和输出数据的构建 :param nodename:str 预测的node :return: """ test_num = 0 Test_Data_X = [] Test_Time = [] starttime = int(round(time.mktime(self.test_start.timetuple())) * 1000) endtime = int(round(time.mktime(self.test_end.timetuple())) * 1000) dfone = esinteracton.search_nodename_timestamp_dataframe( nodename=nodename, starttime=starttime, endtime=endtime, metrics=self.metrics) data = self.getTestData(dfone) targets = self.getTestTargets(dfone) Test_Time = self.getTestTime(dfone) data = scalerdata.transform(data) data_generator = TimeseriesGenerator(data, targets, length=self.timesteps, batch_size=128) for i in range(len(data_generator)): X, Y = data_generator[i] if (test_num == 0): Test_Data_X = X test_num = test_num + 1 else: Test_Data_X = np.concatenate((Test_Data_X, X)) s = np.argwhere(np.isnan(Test_Data_X)) return Test_Data_X, Test_Time
def create_train_data_onenode(self, nodename): """ 对单个nodename进行model训练阶段需要的输入和输出数据的构建 :param nodename: :return: """ train_num = 0 Train_Data_X = [] Train_Data_Y = [] dfone = self.df[self.df['nodename'] == nodename] data = self.getTrainData(dfone) targets = self.getTraintarget(dfone) scaler1 = MinMaxScaler(feature_range=(0, 1)) data = scaler1.fit_transform(data) scaler2 = MinMaxScaler(feature_range=(0, 1)) targets = scaler2.fit_transform(targets) data_generator = TimeseriesGenerator(data, targets, length=self.timesteps, batch_size=1) for i in range(len(data_generator)): X, Y = data_generator[i] if (train_num == 0): Train_Data_X = X Train_Data_Y = Y train_num = train_num + 1 else: Train_Data_X = np.concatenate((Train_Data_X, X)) Train_Data_Y = np.concatenate((Train_Data_Y, Y)) return Train_Data_X, Train_Data_Y
def trainAngleModelOnSets(model, epoch, trainingSubjects, set_gen, timesteps, output_begin, num_outputs, batch_size, in_epochs=1, record=False): c = 0 for inputMatrix, labels in set_gen: printLog('%d. set (Dataset %d) being trained for epoch %d! by %s' % (c + 1, trainingSubjects[c], epoch + 1, now()), record=record) labels = labels[:, output_begin:output_begin + num_outputs] data_gen = TimeseriesGenerator(labels, labels, length=timesteps, batch_size=batch_size) model.fit_generator(data_gen, steps_per_epoch=len(data_gen), epochs=in_epochs, verbose=1) c += 1 return model
def create_time_window_dataset(input_data, time_steps, batch_size): """ Converts the pandas.DataFrame object in the right model input Arguments: input_data -- array containing the input data time_steps -- number of time steps to look back batch_size -- batch size Returns: data_X -- LSTM input with shape (samples, time_steps, features) data_Y -- labels for the data_X with shape (samples, features) """ generator = TimeseriesGenerator(input_data, input_data, length=time_steps, batch_size=batch_size) data_X = [] data_Y = [] # convert to LSTM (samples, time_steps, features) format for i in range(len(generator)): batch_X, batch_Y = generator[i] if i == 0: data_X = batch_X data_Y = batch_Y else: data_X = np.concatenate([data_X, batch_X]) data_Y = np.concatenate([data_Y, batch_Y]) return data_X, data_Y
def item_5(x_train, y_train, n_input): print(x_train.shape) print(y_train.shape) print(n_input) time_sequence_train = TimeseriesGenerator(x_train, y_train, length=n_input) return time_sequence_train
def trainData(request): queryset = str(datasCSV.objects.all().query) df = pd.read_sql_query(queryset, connection) df.nama_tahun = pd.to_datetime(df.nama_tahun) df = df.set_index('nama_tahun') print(df[['data_content']]) df2 = df[['data_content']] train, test = df2[:-12], df2[-12:] scaler = MinMaxScaler() scaler.fit(train) train = scaler.transform(train) test = scaler.transform(test) n_input = 12 n_features = 1 generator = TimeseriesGenerator(train, train, length=n_input, batch_size=6) model = Sequential() model.add(LSTM(200, activation='relu', input_shape=(n_input, n_features))) model.add(Dropout(0.15)) model.add(Dense(1)) model.compile(optimizer='sgd', loss='mse') model.fit_generator(generator, epochs=300) pred_list = [] batch = train[-n_input:].reshape((1, n_input, n_features)) for i in range(n_input): pred_list.append(model.predict(batch)[0]) batch = np.append(batch[:,1:,:], [[pred_list[i]]], axis=1) df_predict = pd.DataFrame(scaler.inverse_transform(pred_list), index=df[-n_input:].index, columns=['Prediksi']) df_predict2 = scaler.inverse_transform(pred_list) df_test = pd.concat([df, df_predict], axis=1) df_test.tail(13) df_test.tail(13) plt.figure(figsize=(20, 5)) plt.plot(df_test.index, df_test['data_content']) plt.plot(df_test.index, df_test['Prediksi'], color='r') fig = plt.gcf() buf = io.BytesIO() fig.savefig(buf,format='png') buf.seek(0) string = base64.b64encode(buf.read()) uri = urllib.parse.quote(string) print(uri) context = { 'matplot':uri, 'title':'Training', } return render(request, 'home/hasil.html', context)
def __init__(self, data, timesteps): # Data as numpy array is provided self.data = data # Data generator is initialized, batch_size=1 is indipendent of neural network's batch_size self.generator = TimeseriesGenerator(self.data, self.data, length=timesteps, batch_size=1)
def _create_generator(self): val = self.df['Sales'].values val = val.reshape((-1,1)) split = int(0.8*len(val)) self.date_train =self. df['Date'][:split] self.date_test = self.df['Date'][split:] self.sales_train = self.df['Sales'].values[:split] self.sales_test = self.df['Sales'].values[split:] self.train = val[:split] self.test = val[split:] self.train_generator = TimeseriesGenerator(self.train, self.train, length=3, batch_size=5) self.test_generator = TimeseriesGenerator(self.test, self.test, length=3, batch_size=1) print("INFO: Dataset Generated.")
def create_gens(self, X, y, seq_length=30, batch_size=15): generator = TimeseriesGenerator(X, y, length=seq_length, sampling_rate=1, stride=1, batch_size=batch_size) return generator
def FullTSGeneratorDirect(df, X, Y, SEQ_LEN, batch_size=64): all_data_gen = TimeseriesGenerator(X, Y, length=SEQ_LEN, batch_size=batch_size, shuffle=False) timestamp = df.index[SEQ_LEN:] return df, timestamp, all_data_gen
def backTest(model, data): data = array(data) train_size = int(len(data) * 0.8) train, test = data[0:train_size], data[train_size:len(data)] n_features = 1 train = train.reshape((len(train), 1)) n_input = 3 generator = TimeseriesGenerator(train, train, length=n_input, batch_size=8) model.fit_generator(generator, steps_per_epoch=1, epochs=500, verbose=0) test = test.reshape((len(test), 1)) n_input = 3 generator = TimeseriesGenerator(test, test, length=n_input, batch_size=8) acc = model.evaluate_generator(generator) return acc
def generate_time_series_sample(data, target, timestep): """Generate samples of a time series with a certain length.""" generator = TimeseriesGenerator(data, target, length=timestep, sampling_rate=1, batch_size=(data.shape[0] - timestep)) return generator[0][0], generator[0][1]