def create_tf_dataset(
    data_array: np.ndarray,
    input_sequence_length: int,
    forecast_horizon: int,
    batch_size: int = 128,
    shuffle=True,
    multi_horizon=True,
):
    """Creates tensorflow dataset from numpy array.

    This function creates a dataset where each element is a tuple `(inputs, targets)`.
    `inputs` is a Tensor
    of shape `(batch_size, input_sequence_length, num_routes, 1)` containing
    the `input_sequence_length` past values of the timeseries for each node.
    `targets` is a Tensor of shape `(batch_size, forecast_horizon, num_routes)`
    containing the `forecast_horizon`
    future values of the timeseries for each node.

    Args:
        data_array: np.ndarray with shape `(num_time_steps, num_routes)`
        input_sequence_length: Length of the input sequence (in number of timesteps).
        forecast_horizon: If `multi_horizon=True`, the target will be the values of the timeseries for 1 to
            `forecast_horizon` timesteps ahead. If `multi_horizon=False`, the target will be the value of the
            timeseries `forecast_horizon` steps ahead (only one value).
        batch_size: Number of timeseries samples in each batch.
        shuffle: Whether to shuffle output samples, or instead draw them in chronological order.
        multi_horizon: See `forecast_horizon`.

    Returns:
        A tf.data.Dataset instance.
    """

    inputs = timeseries_dataset_from_array(
        np.expand_dims(data_array[:-forecast_horizon], axis=-1),
        None,
        sequence_length=input_sequence_length,
        shuffle=False,
        batch_size=batch_size,
    )

    target_offset = (input_sequence_length if multi_horizon else
                     input_sequence_length + forecast_horizon - 1)
    target_seq_length = forecast_horizon if multi_horizon else 1
    targets = timeseries_dataset_from_array(
        data_array[target_offset:],
        None,
        sequence_length=target_seq_length,
        shuffle=False,
        batch_size=batch_size,
    )

    dataset = tf.data.Dataset.zip((inputs, targets))
    if shuffle:
        dataset = dataset.shuffle(100)

    return dataset.prefetch(16).cache()
 def _make_ds(self, data, norm_data):
     return timeseries_dataset_from_array(
         data=data["features"]
         if norm_data["features"] is None else norm_data["features"],
         targets=data["labels"]
         if norm_data["labels"] is None else norm_data["labels"],
         sequence_length=self._num_sessions,
         sequence_stride=1,
         shuffle=False,
         batch_size=self._batch_size)
Esempio n. 3
0
 def make_dataset(self, data, stride=1, batch_size=32):
     data = np.array(data, dtype=np.float32)
     data_set = timeseries_dataset_from_array(
         data=data,
         targets=None,
         sequence_length=self.window_size,
         sequence_stride=stride,
         shuffle=True,
         batch_size=batch_size)
     data_set = data_set.map(self.split_window)
     return data_set
Esempio n. 4
0
    window_size = 256

    batchSize = 64 * 2

    # data_generator = sequence.TimeseriesGenerator(x, y, length=256, batch_size=batchSize)

    input_data = None

    windows = [0] * len(all_songs)

    for index, song in all_songs.items():
        #Dictionary.items() gives items wrapped in a tuple of (key, value)
        x = song[:-window_size, :]
        y = song[window_size:, :]
        window = preprocessing.timeseries_dataset_from_array(
            data=x,
            targets=y,
            sequence_length=window_size,
            sequence_stride=1,
            sampling_rate=1,
            shuffle=True,
            batch_size=batchSize)
        if input_data is None:
            input_data = window
        else:
            input_data = input_data.concatenate(window)

    TwoWide(input_data)

    print("finished")
Esempio n. 5
0
    def _get_SSA_par(df, L=1440, n_max_tries=3):  # 2 <= L <= N/2
        N = len(df)
        K = N - L + 1

        dataset = timeseries_dataset_from_array(
            data=df,
            targets=None,
            sequence_length=L,
            sequence_stride=1,
            sampling_rate=1,
            batch_size=len(df)
        )

        X = list(dataset.as_numpy_iterator())[0]
        print(X.shape)

        for t in range(n_max_tries):
            try:
                U, s, V = linalg.svd(X,
                                     full_matrices=True,
                                     compute_uv=True,
                                     overwrite_a=False,
                                     check_finite=True,
                                     lapack_driver='gesvd')
            except:
                continue

        if t == n_max_tries:
            raise ValueError("SSA reached the max number of tries with error.")

        l = s ** 2  # partial variances
        r = len(s)  # np.linalg.matrix_rank(X) # matrix rank and total number of components
        ### time-series components ###
        gkList = np.zeros(shape=(r, N))  # zero matrix in whose rows SSA components will be saved

        print('input:', X.shape)
        print('U:', U.shape)
        print('s:', s.shape)
        print('V:', V.shape)

        print('r:', r)
        print('gkList:', gkList.shape)

        for k in trange(r, position=0, leave=True):
            Uk = U[:, k]  # k-th order column singular vector
            Vk = V[k, :]  # k-th order row singular vector
            Xk = s[k] * np.outer(Uk, Vk)  # k-th order matrix component
            gk = []  # empty array in which to save successive k-th order component values
            for i in range(min(K - 1, L - 1), -max(K - 1, L - 1) - 1, -1):  # loop over diagonals
                gki = np.mean(np.diag(np.fliplr(Xk), i))  # successive time.series values
                gk.append(gki)
            gkList[k] = gk  # k-th order component

        ### w-corr matrix ###
        w = []  # empty array to which to add successive weights
        LL = min(L, K)
        KK = max(L, K)
        for ll in range(1, LL + 1):  # first 1/3 part of weights
            w.append(ll)
        for ll in range(LL + 1, KK + 1):  # second 1/3 part of weights
            w.append(LL)
        for ll in range(KK + 1, N + 1):  # third 1/3 part of weights
            w.append(N - ll)
        kMin = kkMin = 0  # show w-corr matrix for first 20 index values
        kMax = kkMax = 20

        wMatrix = [[sum(w * gkList[k] * gkList[kk]) / (
                    math.sqrt(sum(w * gkList[k] * gkList[k])) * math.sqrt(sum(w * gkList[kk] * gkList[kk]))) for k in
                    range(kMin, kMax)] for kk in range(kkMin, kkMax)]
        wMatrix = np.array(wMatrix)
        return (r, l, gkList, wMatrix);
Esempio n. 6
0
# from sciann.functionals.rnn_field import RNNField
# from sciann import SciModel
# from sciann.utils import diff, set_random_seed
# from sciann.constraints import Data, Tie
from tensorflow.keras.preprocessing import timeseries_dataset_from_array

# set_random_seed(1234)

tunits = 3

# Synthetic data generated from sin function over [0, 2pi]
x_true = np.linspace(0, np.pi * 2, 100)
y_true = np.sin(x_true)

dataset = timeseries_dataset_from_array(y_true[:-3],
                                        y_true[3:],
                                        sequence_length=tunits,
                                        batch_size=10)
for batch in dataset:
    inputs, targets = batch
    print('ins: {}  outs: {}'.format(inputs, targets))

raise ValueError

# The network inputs should be defined with Variable.
t = RNNVariable(tunits, name='t', dtype='float64')

# Each network is defined by Functional.
y = RNNFunctional('y',
                  t, [5],
                  activation='tanh',
                  recurrent_activation='linear',
Esempio n. 7
0
    return means


if __name__ == "__main__":

    SIZE = 10000

    # create a artificial time series
    time_series = np.random.random(SIZE)  #np.random.uniform(0, 1, SIZE) +
    WINDOW_SIZE = 25

    # for simplicity, use this tensorflow function to structure the time series dataset
    dataset = timeseries_dataset_from_array(
        time_series[:-WINDOW_SIZE],
        time_series[WINDOW_SIZE:],
        sequence_length=WINDOW_SIZE,
        batch_size=SIZE - WINDOW_SIZE,
        shuffle=False,
    )

    # transform Dataset object into array
    for (batch_of_sequences, batch_of_targets) in dataset:

        X = np.array(batch_of_sequences)
        y = np.array(batch_of_targets)

        # simulating a multi output task - two values to predict
        y = np.concatenate([
            np.expand_dims(y, axis=-1),
            np.expand_dims(np.roll(y, 1), axis=-1)
        ],
Esempio n. 8
0
def ts_offset_split(dataframe,
                    steps,
                    lookback,
                    horizon,
                    batch_size,
                    scaler='standard'):
    '''
    This pipeline function returns 3 Keras Timeseries Dataset Objects: train, validation, and test.
    The function first splits the data with the offset split method every 8th day.
    Afterwards the data is scaled by either using the StandardScaler or MinMaxScaler from SciKit library.
    Finally the dataframe is split using the lookback and horizon parameters. 
    '''
    # Offset 8th Day Split
    start = 0
    end = 168
    offset = 24
    training = []
    validation = []

    for i in range(int((365 + 366) / 8)):

        train = dataframe.iloc[start:end]
        val = dataframe.iloc[end:end + offset]
        training.append(train)
        validation.append(val)

        start += 192
        end += 192

    # Decide Splits for sets
    train = pd.concat(training)

    val = pd.concat(validation)

    train = train.append(
        dataframe[(dataframe.index.date > val.index.max())
                  & (dataframe.index.date < dt.date(2021, 1, 1))])

    test = dataframe[dataframe.index.date >= dt.date(2021, 1, 1)]

    tmpdf = pd.concat([train, val, test])

    # Scaler
    if scaler == 'standard':
        X_scaler = StandardScaler()
        y_scaler = StandardScaler()
    elif scaler == 'minmax':
        X_scaler = MinMaxScaler()
        y_scaler = MinMaxScaler()
    elif scaler == None:
        print("Data has not been scaled.")
    else:
        print('Please specify scaler: standard, minmax, or None')

    # Training Split
    start = lookback + horizon
    end = start + train.shape[0]

    X_train = train.values
    y_train = tmpdf.iloc[start:end][['Value']]

    if scaler != None:
        X_train = X_scaler.fit_transform(X_train)
        y_train = y_scaler.fit_transform(y_train)

    # Validation Split
    x_end = len(val) - lookback - horizon
    y_val_start = train.shape[0] + lookback + horizon

    X_val = val.iloc[:x_end]
    y_val = tmpdf.iloc[y_val_start:y_val_start + x_end][['Value']]

    if scaler != None:
        X_val = X_scaler.transform(X_val)
        y_val = y_scaler.transform(y_val)

    # Test Split
    x_end = len(test) - lookback - horizon
    y_test_start = (train.shape[0] + val.shape[0]) + lookback + horizon

    X_test = test.iloc[:x_end]
    y_test = tmpdf.iloc[y_test_start:y_test_start + x_end][['Value']]

    if scaler != None:
        X_test = X_scaler.transform(X_test)
        y_test = y_scaler.transform(y_test)

    # Batch Sequence Generators
    sequence_length = int(lookback / steps)

    dataset_train = timeseries_dataset_from_array(
        X_train,
        y_train,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=True)

    dataset_val = timeseries_dataset_from_array(
        X_val,
        y_val,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=True)

    dataset_test = timeseries_dataset_from_array(
        X_test,
        y_test,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=False)

    return dataset_train, dataset_val, dataset_test
Esempio n. 9
0
def create_datasets(dataframe,
                    split,
                    steps,
                    lookback,
                    horizon,
                    batch_size,
                    scaler='standard'):
    # Split method
    train_split = int(split * dataframe.shape[0])
    val_split = int((split + 0.1) * dataframe.shape[0])

    train = dataframe.iloc[:train_split]
    val = dataframe.iloc[train_split:val_split]
    test = dataframe.iloc[val_split:]

    # Scaler
    if scaler == 'standard':
        X_scaler = StandardScaler()
        y_scaler = StandardScaler()
    elif scaler == 'minmax':
        X_scaler = MinMaxScaler()
        y_scaler = MinMaxScaler()
    else:
        print(
            "Please specify one of 'standard' or 'minmax' to scaler parameter."
        )

    # Training
    start = lookback + horizon
    end = start + train_split

    X_train = train.values
    y_train = dataframe.iloc[start:end][['Value']]

    X_train = X_scaler.fit_transform(X_train)
    y_train = y_scaler.fit_transform(y_train)

    # Validation
    x_end = len(val) - lookback - horizon
    y_val_start = train_split + lookback + horizon

    X_val = val.iloc[:x_end]
    y_val = dataframe.iloc[y_val_start:y_val_start + x_end][['Value']]

    X_val = X_scaler.transform(X_val)
    y_val = y_scaler.transform(y_val)

    # Test
    x_end = len(test) - lookback - horizon
    y_test_start = val_split + lookback + horizon

    X_test = test.iloc[:x_end]
    y_test = dataframe.iloc[y_test_start:y_test_start + x_end][['Value']]

    X_test = X_scaler.transform(X_test)
    y_test = y_scaler.transform(y_test)

    # Batch Sequence Generators
    sequence_length = int(lookback / steps)

    dataset_train = timeseries_dataset_from_array(
        X_train,
        y_train,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=True)

    dataset_val = timeseries_dataset_from_array(
        X_val,
        y_val,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=True)

    dataset_test = timeseries_dataset_from_array(
        X_test,
        y_test,
        sequence_length=sequence_length,
        sampling_rate=steps,
        batch_size=batch_size,
        shuffle=False)

    return dataset_train, dataset_val, dataset_test