def create_tf_dataset( data_array: np.ndarray, input_sequence_length: int, forecast_horizon: int, batch_size: int = 128, shuffle=True, multi_horizon=True, ): """Creates tensorflow dataset from numpy array. This function creates a dataset where each element is a tuple `(inputs, targets)`. `inputs` is a Tensor of shape `(batch_size, input_sequence_length, num_routes, 1)` containing the `input_sequence_length` past values of the timeseries for each node. `targets` is a Tensor of shape `(batch_size, forecast_horizon, num_routes)` containing the `forecast_horizon` future values of the timeseries for each node. Args: data_array: np.ndarray with shape `(num_time_steps, num_routes)` input_sequence_length: Length of the input sequence (in number of timesteps). forecast_horizon: If `multi_horizon=True`, the target will be the values of the timeseries for 1 to `forecast_horizon` timesteps ahead. If `multi_horizon=False`, the target will be the value of the timeseries `forecast_horizon` steps ahead (only one value). batch_size: Number of timeseries samples in each batch. shuffle: Whether to shuffle output samples, or instead draw them in chronological order. multi_horizon: See `forecast_horizon`. Returns: A tf.data.Dataset instance. """ inputs = timeseries_dataset_from_array( np.expand_dims(data_array[:-forecast_horizon], axis=-1), None, sequence_length=input_sequence_length, shuffle=False, batch_size=batch_size, ) target_offset = (input_sequence_length if multi_horizon else input_sequence_length + forecast_horizon - 1) target_seq_length = forecast_horizon if multi_horizon else 1 targets = timeseries_dataset_from_array( data_array[target_offset:], None, sequence_length=target_seq_length, shuffle=False, batch_size=batch_size, ) dataset = tf.data.Dataset.zip((inputs, targets)) if shuffle: dataset = dataset.shuffle(100) return dataset.prefetch(16).cache()
def _make_ds(self, data, norm_data): return timeseries_dataset_from_array( data=data["features"] if norm_data["features"] is None else norm_data["features"], targets=data["labels"] if norm_data["labels"] is None else norm_data["labels"], sequence_length=self._num_sessions, sequence_stride=1, shuffle=False, batch_size=self._batch_size)
def make_dataset(self, data, stride=1, batch_size=32): data = np.array(data, dtype=np.float32) data_set = timeseries_dataset_from_array( data=data, targets=None, sequence_length=self.window_size, sequence_stride=stride, shuffle=True, batch_size=batch_size) data_set = data_set.map(self.split_window) return data_set
window_size = 256 batchSize = 64 * 2 # data_generator = sequence.TimeseriesGenerator(x, y, length=256, batch_size=batchSize) input_data = None windows = [0] * len(all_songs) for index, song in all_songs.items(): #Dictionary.items() gives items wrapped in a tuple of (key, value) x = song[:-window_size, :] y = song[window_size:, :] window = preprocessing.timeseries_dataset_from_array( data=x, targets=y, sequence_length=window_size, sequence_stride=1, sampling_rate=1, shuffle=True, batch_size=batchSize) if input_data is None: input_data = window else: input_data = input_data.concatenate(window) TwoWide(input_data) print("finished")
def _get_SSA_par(df, L=1440, n_max_tries=3): # 2 <= L <= N/2 N = len(df) K = N - L + 1 dataset = timeseries_dataset_from_array( data=df, targets=None, sequence_length=L, sequence_stride=1, sampling_rate=1, batch_size=len(df) ) X = list(dataset.as_numpy_iterator())[0] print(X.shape) for t in range(n_max_tries): try: U, s, V = linalg.svd(X, full_matrices=True, compute_uv=True, overwrite_a=False, check_finite=True, lapack_driver='gesvd') except: continue if t == n_max_tries: raise ValueError("SSA reached the max number of tries with error.") l = s ** 2 # partial variances r = len(s) # np.linalg.matrix_rank(X) # matrix rank and total number of components ### time-series components ### gkList = np.zeros(shape=(r, N)) # zero matrix in whose rows SSA components will be saved print('input:', X.shape) print('U:', U.shape) print('s:', s.shape) print('V:', V.shape) print('r:', r) print('gkList:', gkList.shape) for k in trange(r, position=0, leave=True): Uk = U[:, k] # k-th order column singular vector Vk = V[k, :] # k-th order row singular vector Xk = s[k] * np.outer(Uk, Vk) # k-th order matrix component gk = [] # empty array in which to save successive k-th order component values for i in range(min(K - 1, L - 1), -max(K - 1, L - 1) - 1, -1): # loop over diagonals gki = np.mean(np.diag(np.fliplr(Xk), i)) # successive time.series values gk.append(gki) gkList[k] = gk # k-th order component ### w-corr matrix ### w = [] # empty array to which to add successive weights LL = min(L, K) KK = max(L, K) for ll in range(1, LL + 1): # first 1/3 part of weights w.append(ll) for ll in range(LL + 1, KK + 1): # second 1/3 part of weights w.append(LL) for ll in range(KK + 1, N + 1): # third 1/3 part of weights w.append(N - ll) kMin = kkMin = 0 # show w-corr matrix for first 20 index values kMax = kkMax = 20 wMatrix = [[sum(w * gkList[k] * gkList[kk]) / ( math.sqrt(sum(w * gkList[k] * gkList[k])) * math.sqrt(sum(w * gkList[kk] * gkList[kk]))) for k in range(kMin, kMax)] for kk in range(kkMin, kkMax)] wMatrix = np.array(wMatrix) return (r, l, gkList, wMatrix);
# from sciann.functionals.rnn_field import RNNField # from sciann import SciModel # from sciann.utils import diff, set_random_seed # from sciann.constraints import Data, Tie from tensorflow.keras.preprocessing import timeseries_dataset_from_array # set_random_seed(1234) tunits = 3 # Synthetic data generated from sin function over [0, 2pi] x_true = np.linspace(0, np.pi * 2, 100) y_true = np.sin(x_true) dataset = timeseries_dataset_from_array(y_true[:-3], y_true[3:], sequence_length=tunits, batch_size=10) for batch in dataset: inputs, targets = batch print('ins: {} outs: {}'.format(inputs, targets)) raise ValueError # The network inputs should be defined with Variable. t = RNNVariable(tunits, name='t', dtype='float64') # Each network is defined by Functional. y = RNNFunctional('y', t, [5], activation='tanh', recurrent_activation='linear',
return means if __name__ == "__main__": SIZE = 10000 # create a artificial time series time_series = np.random.random(SIZE) #np.random.uniform(0, 1, SIZE) + WINDOW_SIZE = 25 # for simplicity, use this tensorflow function to structure the time series dataset dataset = timeseries_dataset_from_array( time_series[:-WINDOW_SIZE], time_series[WINDOW_SIZE:], sequence_length=WINDOW_SIZE, batch_size=SIZE - WINDOW_SIZE, shuffle=False, ) # transform Dataset object into array for (batch_of_sequences, batch_of_targets) in dataset: X = np.array(batch_of_sequences) y = np.array(batch_of_targets) # simulating a multi output task - two values to predict y = np.concatenate([ np.expand_dims(y, axis=-1), np.expand_dims(np.roll(y, 1), axis=-1) ],
def ts_offset_split(dataframe, steps, lookback, horizon, batch_size, scaler='standard'): ''' This pipeline function returns 3 Keras Timeseries Dataset Objects: train, validation, and test. The function first splits the data with the offset split method every 8th day. Afterwards the data is scaled by either using the StandardScaler or MinMaxScaler from SciKit library. Finally the dataframe is split using the lookback and horizon parameters. ''' # Offset 8th Day Split start = 0 end = 168 offset = 24 training = [] validation = [] for i in range(int((365 + 366) / 8)): train = dataframe.iloc[start:end] val = dataframe.iloc[end:end + offset] training.append(train) validation.append(val) start += 192 end += 192 # Decide Splits for sets train = pd.concat(training) val = pd.concat(validation) train = train.append( dataframe[(dataframe.index.date > val.index.max()) & (dataframe.index.date < dt.date(2021, 1, 1))]) test = dataframe[dataframe.index.date >= dt.date(2021, 1, 1)] tmpdf = pd.concat([train, val, test]) # Scaler if scaler == 'standard': X_scaler = StandardScaler() y_scaler = StandardScaler() elif scaler == 'minmax': X_scaler = MinMaxScaler() y_scaler = MinMaxScaler() elif scaler == None: print("Data has not been scaled.") else: print('Please specify scaler: standard, minmax, or None') # Training Split start = lookback + horizon end = start + train.shape[0] X_train = train.values y_train = tmpdf.iloc[start:end][['Value']] if scaler != None: X_train = X_scaler.fit_transform(X_train) y_train = y_scaler.fit_transform(y_train) # Validation Split x_end = len(val) - lookback - horizon y_val_start = train.shape[0] + lookback + horizon X_val = val.iloc[:x_end] y_val = tmpdf.iloc[y_val_start:y_val_start + x_end][['Value']] if scaler != None: X_val = X_scaler.transform(X_val) y_val = y_scaler.transform(y_val) # Test Split x_end = len(test) - lookback - horizon y_test_start = (train.shape[0] + val.shape[0]) + lookback + horizon X_test = test.iloc[:x_end] y_test = tmpdf.iloc[y_test_start:y_test_start + x_end][['Value']] if scaler != None: X_test = X_scaler.transform(X_test) y_test = y_scaler.transform(y_test) # Batch Sequence Generators sequence_length = int(lookback / steps) dataset_train = timeseries_dataset_from_array( X_train, y_train, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=True) dataset_val = timeseries_dataset_from_array( X_val, y_val, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=True) dataset_test = timeseries_dataset_from_array( X_test, y_test, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=False) return dataset_train, dataset_val, dataset_test
def create_datasets(dataframe, split, steps, lookback, horizon, batch_size, scaler='standard'): # Split method train_split = int(split * dataframe.shape[0]) val_split = int((split + 0.1) * dataframe.shape[0]) train = dataframe.iloc[:train_split] val = dataframe.iloc[train_split:val_split] test = dataframe.iloc[val_split:] # Scaler if scaler == 'standard': X_scaler = StandardScaler() y_scaler = StandardScaler() elif scaler == 'minmax': X_scaler = MinMaxScaler() y_scaler = MinMaxScaler() else: print( "Please specify one of 'standard' or 'minmax' to scaler parameter." ) # Training start = lookback + horizon end = start + train_split X_train = train.values y_train = dataframe.iloc[start:end][['Value']] X_train = X_scaler.fit_transform(X_train) y_train = y_scaler.fit_transform(y_train) # Validation x_end = len(val) - lookback - horizon y_val_start = train_split + lookback + horizon X_val = val.iloc[:x_end] y_val = dataframe.iloc[y_val_start:y_val_start + x_end][['Value']] X_val = X_scaler.transform(X_val) y_val = y_scaler.transform(y_val) # Test x_end = len(test) - lookback - horizon y_test_start = val_split + lookback + horizon X_test = test.iloc[:x_end] y_test = dataframe.iloc[y_test_start:y_test_start + x_end][['Value']] X_test = X_scaler.transform(X_test) y_test = y_scaler.transform(y_test) # Batch Sequence Generators sequence_length = int(lookback / steps) dataset_train = timeseries_dataset_from_array( X_train, y_train, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=True) dataset_val = timeseries_dataset_from_array( X_val, y_val, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=True) dataset_test = timeseries_dataset_from_array( X_test, y_test, sequence_length=sequence_length, sampling_rate=steps, batch_size=batch_size, shuffle=False) return dataset_train, dataset_val, dataset_test