コード例 #1
0
    def test_TimeSeriesGenerator_doesnt_miss_any_sample(self):
        x = np.array([[i] for i in range(10)])

        for length in range(3, 10):
            g = sequence.TimeseriesGenerator(x, x, length=length, batch_size=1)
            expected = max(0, len(x) - length)
            actual = len(g)

            self.assertEqual(expected, actual)

            if len(g) > 0:
                # All elements in range(length, 10) should be used as current
                # step
                expected = np.arange(length, 10).reshape(-1, 1)

                y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0)
                self.assertAllClose(y, expected)

        x = np.array([[i] for i in range(23)])

        strides = (1, 1, 5, 7, 3, 5, 3)
        lengths = (3, 3, 4, 3, 1, 3, 7)
        batch_sizes = (6, 6, 6, 5, 6, 6, 6)
        shuffles = (False, True, True, False, False, False, False)

        for stride, length, batch_size, shuffle in zip(strides, lengths,
                                                       batch_sizes, shuffles):
            g = sequence.TimeseriesGenerator(
                x,
                x,
                length=length,
                sampling_rate=1,
                stride=stride,
                start_index=0,
                end_index=None,
                shuffle=shuffle,
                reverse=False,
                batch_size=batch_size,
            )
            if shuffle:
                # all batches have the same size when shuffle is True.
                expected_sequences = (math.ceil(
                    (23 - length) / float(batch_size * stride)) * batch_size)
            else:
                # last batch will be different if `(samples - length) / stride`
                # is not a multiple of `batch_size`.
                expected_sequences = math.ceil((23 - length) / float(stride))

            expected_batches = math.ceil(expected_sequences /
                                         float(batch_size))

            y = [g[ix][1] for ix in range(len(g))]

            actual_sequences = sum(len(y_) for y_ in y)
            actual_batches = len(y)

            self.assertEqual(expected_sequences, actual_sequences)
            self.assertEqual(expected_batches, actual_batches)
コード例 #2
0
def get_time_series(data, labels, TIME_STEPS=10, START_INDEX=0):

    try:
        data_gen = sequence.TimeseriesGenerator(data,
                                                labels,
                                                length=TIME_STEPS,
                                                sampling_rate=1,
                                                batch_size=1,
                                                start_index=int(TIME_STEPS /
                                                                2))

        X = np.array([data_gen[i][0][0] for i in range(len(data_gen))])
        Y = np.array([data_gen[i][1][0] for i in range(len(data_gen))])
    except:

        def TimeseriesGenerator(data, targets, length=10, start_index=0):
            i = start_index
            X, Y = [], []
            while i < len(data) - length:
                X.append(data[i:i + length])
                #				Y.append(targets[i:i+length])
                Y.append(targets[i])
                i += 1
            X = np.array(X)
            Y = np.array(Y)
            return X, Y

        X, Y = TimeseriesGenerator(data,
                                   labels,
                                   length=TIME_STEPS,
                                   start_index=START_INDEX)
    return X, Y
コード例 #3
0
ファイル: sequence_test.py プロジェクト: z-a-f/keras-1
    def test_TimeseriesGenerator(self):
        data = np.array([[i] for i in range(50)])
        targets = np.array([[i] for i in range(50)])

        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              batch_size=2)
        self.assertEqual(len(data_gen), 20)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]]))
        self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))
        self.assertAllClose(
            data_gen[1][0],
            np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]]))
        self.assertAllClose(data_gen[1][1], np.array([[12], [13]]))

        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              reverse=True,
                                                              batch_size=2)
        self.assertEqual(len(data_gen), 20)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]))
        self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))

        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              shuffle=True,
                                                              batch_size=1)
        batch = data_gen[0]
        r = batch[1][0][0]
        self.assertAllClose(
            batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4],
                                 [r - 2]]]))
        self.assertAllClose(batch[1], np.array([
            [r],
        ]))

        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              stride=2,
                                                              batch_size=2)
        self.assertEqual(len(data_gen), 10)
        self.assertAllClose(
            data_gen[1][0],
            np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12],
                                                    [14]]]))
        self.assertAllClose(data_gen[1][1], np.array([[14], [16]]))

        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              start_index=10,
                                                              end_index=30,
                                                              batch_size=2)
        self.assertEqual(len(data_gen), 6)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[10], [12], [14], [16], [18]],
                      [[11], [13], [15], [17], [19]]]))
        self.assertAllClose(data_gen[0][1], np.array([[20], [21]]))

        data = np.array(
            [np.random.random_sample((1, 2, 3, 4)) for i in range(50)])
        targets = np.array(
            [np.random.random_sample((3, 2, 1)) for i in range(50)])
        data_gen = preprocessing_sequence.TimeseriesGenerator(data,
                                                              targets,
                                                              length=10,
                                                              sampling_rate=2,
                                                              start_index=10,
                                                              end_index=30,
                                                              batch_size=2)

        self.assertEqual(len(data_gen), 6)
        self.assertAllClose(
            data_gen[0][0],
            np.array([np.array(data[10:19:2]),
                      np.array(data[11:20:2])]))
        self.assertAllClose(data_gen[0][1],
                            np.array([targets[20], targets[21]]))

        with self.assertRaises(ValueError) as context:
            preprocessing_sequence.TimeseriesGenerator(data,
                                                       targets,
                                                       length=50)
        error = str(context.exception)
        self.assertIn('`start_index+length=50 > end_index=49` is disallowed',
                      error)
コード例 #4
0
    def test_TimeseriesGenerator(self):
        data = np.array([[i] for i in range(50)])
        targets = np.array([[i] for i in range(50)])

        data_gen = sequence.TimeseriesGenerator(data,
                                                targets,
                                                length=10,
                                                sampling_rate=2,
                                                batch_size=2)
        self.assertLen(data_gen, 20)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5], [7], [9]]]),
        )
        self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))
        self.assertAllClose(
            data_gen[1][0],
            np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7], [9], [11]]]),
        )
        self.assertAllClose(data_gen[1][1], np.array([[12], [13]]))

        data_gen = sequence.TimeseriesGenerator(
            data,
            targets,
            length=10,
            sampling_rate=2,
            reverse=True,
            batch_size=2,
        )
        self.assertLen(data_gen, 20)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5], [3], [1]]]),
        )
        self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))

        data_gen = sequence.TimeseriesGenerator(
            data,
            targets,
            length=10,
            sampling_rate=2,
            shuffle=True,
            batch_size=1,
        )
        batch = data_gen[0]
        r = batch[1][0][0]
        self.assertAllClose(
            batch[0], np.array([[[r - 10], [r - 8], [r - 6], [r - 4],
                                 [r - 2]]]))
        self.assertAllClose(
            batch[1],
            np.array([
                [r],
            ]),
        )

        data_gen = sequence.TimeseriesGenerator(data,
                                                targets,
                                                length=10,
                                                sampling_rate=2,
                                                stride=2,
                                                batch_size=2)
        self.assertLen(data_gen, 10)
        self.assertAllClose(
            data_gen[1][0],
            np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10], [12],
                                                    [14]]]),
        )
        self.assertAllClose(data_gen[1][1], np.array([[14], [16]]))

        data_gen = sequence.TimeseriesGenerator(
            data,
            targets,
            length=10,
            sampling_rate=2,
            start_index=10,
            end_index=30,
            batch_size=2,
        )
        self.assertLen(data_gen, 6)
        self.assertAllClose(
            data_gen[0][0],
            np.array([[[10], [12], [14], [16], [18]],
                      [[11], [13], [15], [17], [19]]]),
        )
        self.assertAllClose(data_gen[0][1], np.array([[20], [21]]))

        data = np.array(
            [np.random.random_sample((1, 2, 3, 4)) for i in range(50)])
        targets = np.array(
            [np.random.random_sample((3, 2, 1)) for i in range(50)])
        data_gen = sequence.TimeseriesGenerator(
            data,
            targets,
            length=10,
            sampling_rate=2,
            start_index=10,
            end_index=30,
            batch_size=2,
        )
        self.assertLen(data_gen, 6)
        self.assertAllClose(
            data_gen[0][0],
            np.array([np.array(data[10:19:2]),
                      np.array(data[11:20:2])]),
        )
        self.assertAllClose(data_gen[0][1],
                            np.array([targets[20], targets[21]]))

        with self.assertRaisesRegex(
                ValueError,
                r"`start_index\+length=50 > end_index=49` is disallowed"):
            sequence.TimeseriesGenerator(data, targets, length=50)