def test_TimeseriesGenerator_on_text(): txt = bytearray("Keras is simple.", 'utf-8') data_gen = TimeseriesGenerator(txt, txt, hlength=10, batch_size=1, gap=1) # for i in range(len(data_gen)): # print(data_gen[i][0].tostring(), "->'%s'" % data_gen[i][1].tostring()) assert data_gen[-1][0].shape == (1, 10) and data_gen[-1][1].shape == (1, ) assert data_gen[-1][0].tostring() == b" is simple" assert data_gen[-1][1].tostring() == b"." data_gen = TimeseriesGenerator(txt, txt, hlength=10, target_seq=True, batch_size=1, gap=1) assert data_gen[-1][0].shape == (1, 10) and data_gen[-1][1].shape == (1, 10, 1) # for i in range(len(data_gen)): # print(data_gen[i][0].tostring(), "->'%s'" % data_gen[i][1].tostring()) assert data_gen[0][1].tostring() == b"eras is si"
def test_TimeSeriesGenerator_doesnt_miss_any_sample1(): x = np.array([[i] for i in range(10)]) for gap in range(10): for length in range(1, 11 - gap): expected = len(x) - length + 1 - gap if expected > 0: g = TimeseriesGenerator(x, x, length=length, batch_size=1, gap=gap) actual = len(g) assert expected == actual x = np.array([i for i in range(7)]) g = TimeseriesGenerator(x, x, hlength=3, batch_size=2) expected_len = ceil((len(x) - g.hlength + 1.0) / g.batch_size) print('gap: %i, hlength: %i, expected-len:%i, len: %i' % (g.gap, g.hlength, expected_len, g.len)) # for i in range(len(g)): # print(i, g[i]) assert len(g) == expected_len
def test_TimeSeriesGenerator_doesnt_miss_any_sample2(): x = np.array([[i] for i in range(23)]) strides = (1, 1, 5, 7, 3, 5, 3) lengths = (3, 3, 4, 3, 1, 3, 7) batch_sizes = (6, 6, 6, 5, 6, 6, 6) shuffles = (False, True, True, False, False, False, False) for stride, length, batch_size, shuffle in zip(strides, lengths, batch_sizes, shuffles): g = TimeseriesGenerator(x, x, length=length, sampling_rate=1, stride=stride, start_index=0, end_index=None, shuffle=shuffle, reverse=False, batch_size=batch_size) # last batch will be different if `(samples - length) / stride` # is not a multiple of `batch_size`. expected_sequences = int(ceil((len(x) - length + 1.0) / stride)) expected_batches = ceil(expected_sequences / float(batch_size)) print('gap: %i, hlength: %i, expected-len:%i, len: %i' % (g.gap, g.hlength, expected_batches, g.len)) for i in range(len(g)): print(i, g[i]) y = [g[ix][1] for ix in range(len(g))] actual_sequences = sum(len(_y) for _y in y) actual_batches = len(y) assert expected_sequences == actual_sequences assert expected_batches == actual_batches
def test_TimeseriesGenerator_exceptions(): data = np.array([[i] for i in range(50)]) with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=50, stride=0) error = str(context.exception) print(error) assert 'must be strictly positive.' in error with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=50, sampling_rate=0) error = str(context.exception) print(error) assert 'must be strictly positive.' in error with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=50, batch_size=0) error = str(context.exception) print(error) assert 'must be strictly positive.' in error with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=50, start_index=50) error = str(context.exception) print(error) assert 'This configuration gives no output' in error with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=50, sampling_rate=51) error = str(context.exception) print(error) assert "`length` has to be a multiple of `sampling_rate`." " For instance, `length=102` would do." in error with assert_raises(ValueError) as context: TimeseriesGenerator(data, data, length=10, sampling_rate=3) error = str(context.exception) print(error) assert "`length` has to be a multiple of `sampling_rate`." " For instance, `length=6` would do." in error
df = df[df['data_address_delta'].notnull()] # plot_data_frame(df) scaler = MinMaxScaler(feature_range=(0, 1)) df = scaler.fit_transform(df[['thread_id', 'pc', 'data_address_delta']]) train, test = train_test_split(df, test_size=0.15) look_back = 10 n_features = 2 train_data_gen = TimeseriesGenerator(train, train, length=look_back, sampling_rate=1, stride=1, batch_size=3) test_data_gen = TimeseriesGenerator(test, test, length=look_back, sampling_rate=1, stride=1, batch_size=1) model = Sequential() model.add(LSTM(25, input_shape=(look_back, n_features))) model.add(Dense(n_features, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print('Number of samples:', len(df)) X = df.loc[:, df.columns != 'Class'].values y = to_categorical(df.loc[:, 'Class']) X = np.concatenate((X[1:], y[:-1]), axis=1) y = y[1:] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=split, shuffle=False) train_generator = TimeseriesGenerator(X_train, y_train, length=window_size, batch_size=batch_size) test_generator = TimeseriesGenerator(X_test, y_test, length=window_size, batch_size=1) model = Sequential() model.add(CuDNNGRU(128, input_shape=( window_size, X_train.shape[1], ))) model.add(Dense(64, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(y_train.shape[1], activation='sigmoid'))
def test_TimeseriesGenerator_previous_tests(): data = np.array([[i] for i in range(50)]) data_gen = TimeseriesGenerator(data, data, length=10, sampling_rate=2, reverse=True, batch_size=2, gap=2) assert len(data_gen) == 20 assert (np.allclose( data_gen[0][0], np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]))) assert (np.allclose(data_gen[0][1], np.array([[10], [11]]))) data_gen = TimeseriesGenerator(data, data, length=10, sampling_rate=2, shuffle=True, batch_size=1, gap=2) batch = data_gen[0] r = batch[1][0][0] assert (np.allclose( batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4], [r - 2]]]))) assert (np.allclose(batch[1], np.array([ [r], ]))) data_gen = TimeseriesGenerator(data, data, length=10, sampling_rate=2, stride=2, batch_size=2, gap=2) assert len(data_gen) == 10 assert (np.allclose( data_gen[1][0], np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12], [14]]]))) assert (np.allclose(data_gen[1][1], np.array([[14], [16]]))) data_gen = TimeseriesGenerator(data, data, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2, gap=2) assert len(data_gen) == 5 assert (np.allclose( data_gen[0][0], np.array([[[10], [12], [14], [16], [18]], [[11], [13], [15], [17], [19]]]))) assert (np.allclose(data_gen[0][1], np.array([[20], [21]]))) data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) data_gen = TimeseriesGenerator(data, targets, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2, gap=2) assert len(data_gen) == 5 assert np.allclose( data_gen[0][0], np.array([np.array(data[10:19:2]), np.array(data[11:20:2])])) assert (np.allclose(data_gen[0][1], np.array([targets[20], targets[21]])))
def test_TimeseriesGenerator_types(): print("** test 0 (float types)") data = np.array([[i] for i in range(50)], dtype=np.float) targets = np.array([[float(i)] for i in range(50)]) data_gen = TimeseriesGenerator(data, targets, hlength=5, sampling_rate=2, gap=2, batch_size=2, shuffle=False) x, y = data_gen[0] assert np.allclose( x, np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]])) assert np.allclose(y, np.array([[10], [11]])) print("** test 1 (auto types)") data = np.array([[i] for i in range(50)], dtype=np.float) targets = np.array([[i] for i in range(50)], dtype=np.float) data_gen = TimeseriesGenerator(data, targets, hlength=5, sampling_rate=2, gap=2, batch_size=2, shuffle=False) x, y = data_gen[0] assert len(data_gen) == 20 assert np.array_equal( x, np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]])) assert np.array_equal(y, np.array([[10], [11]])) x, y = data_gen[-1] assert np.array_equal( x, np.array([[[38], [40], [42], [44], [46]], [[39], [41], [43], [45], [47]]])) assert np.array_equal(y, np.array([[48], [49]])) print("** test 2 (batch_size=4)") data_gen = TimeseriesGenerator(data, targets, hlength=10, batch_size=4, gap=1) assert len(data_gen) == 10 x, y = data_gen[0] assert np.array_equal( x[1], np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])) assert np.array_equal(y, np.array([[10], [11], [12], [13]])) data_gen = TimeseriesGenerator(data, targets, hlength=10, reverse=True, batch_size=2) x, y = data_gen[0] assert np.array_equal(x[1, 0], np.array([10])) print("** test 3 (when sampling_rate is not a multiple of hlength)") data_gen = TimeseriesGenerator(data, targets, hlength=10, sampling_rate=3, batch_size=2) # for i in range(len(data_gen)): # print(i,data_gen[i]) assert len(data_gen) == 12 print("** test 4 (stateful)") data_gen = TimeseriesGenerator(data, targets, hlength=10, sampling_rate=2, batch_size=5, stateful=True, gap=2, stride=4)
window_size = 7 batch_size = 256 epochs = 64 df = pd.read_csv('./data/raw/DJIA_table.csv') scaler = StandardScaler() data = scaler.fit_transform((df['Close'] - df['Open']).values.reshape(-1, 1)) X = data[:-1] y = data[1:] X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) train_data_gen = TimeseriesGenerator(X_train, y_train, length=window_size, batch_size=batch_size, shuffle=False) test_data_gen = TimeseriesGenerator(X_test, y_test, length=window_size, batch_size=batch_size, shuffle=False) model = Sequential() model.add(CuDNNGRU(4, input_shape=(window_size, 1,))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') history = model.fit_generator(train_data_gen, epochs=epochs).history index = [df['Open'][0]] for i, d in enumerate(scaler.inverse_transform(data)): index.append(index[i] + d) index_train = [df['Open'][0]] for i, d in enumerate(scaler.inverse_transform(model.predict_generator(train_data_gen))): index_train.append(index_train[i] + d)