def test_TimeSeriesGenerator_doesnt_miss_any_sample(): x = np.array([[i] for i in range(10)]) for length in range(3, 10): g = sequence.TimeseriesGenerator(x, x, length=length, batch_size=1) expected = max(0, len(x) - length) actual = len(g) assert expected == actual if len(g) > 0: # All elements in range(length, 10) should be used as current step expected = np.arange(length, 10).reshape(-1, 1) y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) assert_allclose(y, expected) x = np.array([[i] for i in range(23)]) strides = (1, 1, 5, 7, 3, 5, 3) lengths = (3, 3, 4, 3, 1, 3, 7) batch_sizes = (6, 6, 6, 5, 6, 6, 6) shuffles = (False, True, True, False, False, False, False) for stride, length, batch_size, shuffle in zip(strides, lengths, batch_sizes, shuffles): g = sequence.TimeseriesGenerator(x, x, length=length, sampling_rate=1, stride=stride, start_index=0, end_index=None, shuffle=shuffle, reverse=False, batch_size=batch_size) if shuffle: # all batches have the same size when shuffle is True. expected_sequences = ceil( (23 - length) / float(batch_size * stride)) * batch_size else: # last batch will be different if `(samples - length) / stride` # is not a multiple of `batch_size`. expected_sequences = ceil((23 - length) / float(stride)) expected_batches = ceil(expected_sequences / float(batch_size)) y = [g[ix][1] for ix in range(len(g))] actual_sequences = sum(len(_y) for _y in y) actual_batches = len(y) assert expected_sequences == actual_sequences assert expected_batches == actual_batches
def test_TimeseriesGenerator_serde(): data = np.array([[i] for i in range(50)]) targets = np.array([[i] for i in range(50)]) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, batch_size=2) json_gen = data_gen.to_json() recovered_gen = sequence.timeseries_generator_from_json(json_gen) assert data_gen.batch_size == recovered_gen.batch_size assert data_gen.end_index == recovered_gen.end_index assert data_gen.length == recovered_gen.length assert data_gen.reverse == recovered_gen.reverse assert data_gen.sampling_rate == recovered_gen.sampling_rate assert data_gen.shuffle == recovered_gen.shuffle assert data_gen.start_index == data_gen.start_index assert data_gen.stride == data_gen.stride assert (data_gen.data == recovered_gen.data).all() assert (data_gen.targets == recovered_gen.targets).all()
def test_TimeseriesGenerator(): data = np.array([[i] for i in range(50)]) targets = np.array([[i] for i in range(50)]) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, batch_size=2) assert len(data_gen) == 20 assert (np.allclose( data_gen[0][0], np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]]))) assert (np.allclose(data_gen[0][1], np.array([[10], [11]]))) assert (np.allclose( data_gen[1][0], np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]]))) assert (np.allclose(data_gen[1][1], np.array([[12], [13]]))) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, reverse=True, batch_size=2) assert len(data_gen) == 20 assert (np.allclose( data_gen[0][0], np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]))) assert (np.allclose(data_gen[0][1], np.array([[10], [11]]))) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, shuffle=True, batch_size=1) batch = data_gen[0] r = batch[1][0][0] assert (np.allclose( batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4], [r - 2]]]))) assert (np.allclose(batch[1], np.array([ [r], ]))) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, stride=2, batch_size=2) assert len(data_gen) == 10 assert (np.allclose( data_gen[1][0], np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12], [14]]]))) assert (np.allclose(data_gen[1][1], np.array([[14], [16]]))) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2) assert len(data_gen) == 6 assert (np.allclose( data_gen[0][0], np.array([[[10], [12], [14], [16], [18]], [[11], [13], [15], [17], [19]]]))) assert (np.allclose(data_gen[0][1], np.array([[20], [21]]))) data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) data_gen = sequence.TimeseriesGenerator(data, targets, length=10, sampling_rate=2, start_index=10, end_index=30, batch_size=2) assert len(data_gen) == 6 assert np.allclose( data_gen[0][0], np.array([np.array(data[10:19:2]), np.array(data[11:20:2])])) assert (np.allclose(data_gen[0][1], np.array([targets[20], targets[21]]))) with assert_raises(ValueError) as context: sequence.TimeseriesGenerator(data, targets, length=50) error = str(context.exception) assert '`start_index+length=50 > end_index=49` is disallowed' in error