Example #1
0
 def test_shuffle(self):
     # Test cross-epoch random order and seed determinism
     data = np.arange(10)
     targets = data * 2
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        targets,
                                                        sequence_length=5,
                                                        batch_size=1,
                                                        shuffle=True,
                                                        seed=123)
     first_seq = None
     for x, y in dataset.take(1):
         self.assertNotAllClose(x, np.arange(0, 5))
         self.assertAllClose(x[:, 0] * 2, y)
         first_seq = x
     # Check that a new iteration with the same dataset yields different results
     for x, _ in dataset.take(1):
         self.assertNotAllClose(x, first_seq)
     # Check determism with same seed
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        targets,
                                                        sequence_length=5,
                                                        batch_size=1,
                                                        shuffle=True,
                                                        seed=123)
     for x, _ in dataset.take(1):
         self.assertAllClose(x, first_seq)
Example #2
0
    def predict(frame: DataFrame, split_fraction: float = PredictorUtils.SPLIT_FRACTION,
                past: int = PredictorUtils.PAST, batch_size: int = PredictorUtils.BATCH_SIZE,
                step: int = PredictorUtils.STEP, future: int = PredictorUtils.FUTURE,
                show_visualization: bool = False) -> Optional[PredictionDTO]:
        if len(frame) >= past and isfile(PredictorUtils.PATH_MODEL_FILE):
            train_split: int = int(split_fraction * int(frame.shape[0]))
            features: DataFrame = PredictorUtils.create_features(train_split, frame, False)

            x_val: ndarray = features.iloc[-past:][[i for i in range(len(PredictorUtils.SELECTED))]].values
            y_val: MaskedArray = zeros(past)

            sequence_length: int = int(past / step)
            dataset_val: BatchDataset = timeseries_dataset_from_array(
                x_val,
                y_val,
                sequence_length=sequence_length,
                sampling_rate=step,
                batch_size=batch_size,
            )
            model: Functional = PredictorUtils.load_model()
            for x, y in dataset_val.take(5):
                predictions: ndarray = model.predict(x)[0]
                print(predictions)
                if show_visualization:
                    PredictorUtils.show_plot(
                        [x[0][:, 1].numpy(), predictions],
                        int(future / step),
                        'Single Step Prediction',
                    )
                return PredictionDTO(predictions[0] - x_val[-1:][0][PredictorUtils.CLOSE_COLUMN])
Example #3
0
    def plot_prediction(frame: DataFrame, split_fraction: float = PredictorUtils.SPLIT_FRACTION,
                        past: int = PredictorUtils.PAST, batch_size: int = PredictorUtils.BATCH_SIZE,
                        step: int = PredictorUtils.STEP, future: int = PredictorUtils.FUTURE) -> None:
        if len(frame) >= past and isfile(PredictorUtils.PATH_MODEL_FILE):
            train_split: int = int(split_fraction * int(frame.shape[0]))
            features: DataFrame = PredictorUtils.create_features(train_split, frame)

            start: int = past + future
            x_val: ndarray = features.iloc[-start:-future][[i for i in range(
                len(PredictorUtils.SELECTED))]].values
            y_val: MaskedArray = zeros(past)

            full_range: ndarray = features.iloc[-start:][[PredictorUtils.CLOSE_COLUMN]].values[::step]

            sequence_length: int = int(past / step)
            dataset_val: BatchDataset = timeseries_dataset_from_array(
                x_val,
                y_val,
                sequence_length=sequence_length,
                sampling_rate=step,
                batch_size=batch_size,
            )
            model: Functional = PredictorUtils.load_model()
            for x, y in dataset_val.take(5):
                predictions: ndarray = model.predict(x)[0]
                print(predictions)
                PredictorUtils.show_plot(
                    [full_range, predictions],
                    int(future / step),
                    'Single Step Prediction',
                    True
                )
Example #4
0
 def test_errors(self):
     # bad start index
     with self.assertRaisesRegex(ValueError, 'start_index must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      start_index=-1)
     with self.assertRaisesRegex(ValueError, 'start_index must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      start_index=11)
     # bad end index
     with self.assertRaisesRegex(ValueError, 'end_index must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      end_index=-1)
     with self.assertRaisesRegex(ValueError, 'end_index must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      end_index=11)
     # bad sampling_rate
     with self.assertRaisesRegex(ValueError, 'sampling_rate must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      sampling_rate=0)
     # bad sequence stride
     with self.assertRaisesRegex(ValueError, 'sequence_stride must be '):
         _ = timeseries.timeseries_dataset_from_array(np.arange(10),
                                                      None,
                                                      3,
                                                      sequence_stride=0)
Example #5
0
 def test_start_and_end_index(self):
     data = np.arange(100)
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        None,
                                                        sequence_length=9,
                                                        batch_size=5,
                                                        sequence_stride=3,
                                                        sampling_rate=2,
                                                        start_index=10,
                                                        end_index=90)
     for batch in dataset:
         self.assertAllLess(batch[0], 90)
         self.assertAllGreater(batch[0], 9)
Example #6
0
 def test_timeseries_regression(self):
     # Test simple timeseries regression use case
     data = np.arange(10)
     offset = 3
     targets = data[offset:]
     dataset = timeseries.timeseries_dataset_from_array(
         data, targets, sequence_length=offset, batch_size=1)
     i = 0
     for batch in dataset:
         self.assertLen(batch, 2)
         inputs, targets = batch
         self.assertEqual(inputs.shape, (1, 3))
         # Check values
         self.assertAllClose(targets[0], data[offset + i])
         self.assertAllClose(inputs[0], data[i:i + offset])
         i += 1
     self.assertEqual(i, 7)  # Expect 7 batches
Example #7
0
 def test_no_targets(self):
     data = np.arange(50)
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        None,
                                                        sequence_length=10,
                                                        batch_size=5)
     # Expect 9 batches
     i = None
     for i, batch in enumerate(dataset):
         if i < 8:
             self.assertEqual(batch.shape, (5, 10))
         elif i == 8:
             self.assertEqual(batch.shape, (1, 10))
         for j in range(min(5, len(batch))):
             # Check each sample in the batch
             self.assertAllClose(batch[j],
                                 np.arange(i * 5 + j, i * 5 + j + 10))
     self.assertEqual(i, 8)
Example #8
0
 def test_basics(self):
     # Test ordering, targets, sequence length, batch size
     data = np.arange(100)
     targets = data * 2
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        targets,
                                                        sequence_length=9,
                                                        batch_size=5)
     # Expect 19 batches
     for i, batch in enumerate(dataset):
         self.assertLen(batch, 2)
         inputs, targets = batch
         if i < 18:
             self.assertEqual(inputs.shape, (5, 9))
         if i == 18:
             # Last batch: size 2
             self.assertEqual(inputs.shape, (2, 9))
         # Check target values
         self.assertAllClose(targets, inputs[:, 0] * 2)
         for j in range(min(5, len(inputs))):
             # Check each sample in the batch
             self.assertAllClose(inputs[j],
                                 np.arange(i * 5 + j, i * 5 + j + 9))
Example #9
0
 def test_sequence_stride(self):
     data = np.arange(100)
     targets = data * 2
     dataset = timeseries.timeseries_dataset_from_array(data,
                                                        targets,
                                                        sequence_length=9,
                                                        batch_size=5,
                                                        sequence_stride=3)
     for i, batch in enumerate(dataset):
         self.assertLen(batch, 2)
         inputs, targets = batch
         if i < 6:
             self.assertEqual(inputs.shape, (5, 9))
         if i == 6:
             # Last batch: size 1
             self.assertEqual(inputs.shape, (1, 9))
         # Check target values
         self.assertAllClose(inputs[:, 0] * 2, targets)
         for j in range(min(5, len(inputs))):
             # Check each sample in the batch
             start_index = i * 5 * 3 + j * 3
             end_index = start_index + 9
             self.assertAllClose(inputs[j],
                                 np.arange(start_index, end_index))
Example #10
0
    def fit(cls, frame: DataFrame, split_fraction: float = PredictorUtils.SPLIT_FRACTION,
            step: int = PredictorUtils.STEP, past: int = PredictorUtils.PAST, future: int = PredictorUtils.FUTURE,
            batch_size: int = PredictorUtils.BATCH_SIZE, sufficient_data: int = PredictorUtils.SUFFICIENT_DATA,
            show_visualization: bool = False) -> None:
        """
        There are between 29 and 32 data records per day
        """

        if len(frame) < sufficient_data:
            return

        # Raw Data Visualization

        if show_visualization:
            PredictorUtils.show_raw_visualization(frame)
            PredictorUtils.show_heatmap(frame)

        # Data Preprocessing

        print(
            'The selected parameters are:',
            ', '.join([PredictorUtils.TITLES[i] for i in PredictorUtils.SELECTED]),
        )

        train_split: int = int(split_fraction * int(frame.shape[0]))
        features: DataFrame = PredictorUtils.create_features(train_split, frame)

        train_data: DataFrame = features.loc[0: train_split - 1]
        val_data: DataFrame = features.loc[train_split:]

        # Training dataset

        start: int = past + future
        end: int = start + train_split

        x_train: ndarray = train_data[[i for i in range(len(PredictorUtils.SELECTED))]].values
        y_train: DataFrame = features.iloc[start:end][[PredictorUtils.CLOSE_COLUMN]]

        sequence_length: int = int(past / step)

        dataset_train: BatchDataset = timeseries_dataset_from_array(
            x_train,
            y_train,
            sequence_length=sequence_length,
            sampling_rate=step,
            batch_size=batch_size,
        )

        # Validation dataset

        x_end: int = len(val_data) - past - future

        label_start: int = train_split + past + future

        x_val: ndarray = val_data.iloc[:x_end][[i for i in range(len(PredictorUtils.SELECTED))]].values
        y_val: DataFrame = features.iloc[label_start:][[PredictorUtils.CLOSE_COLUMN]]

        dataset_val: BatchDataset = timeseries_dataset_from_array(
            x_val,
            y_val,
            sequence_length=sequence_length,
            sampling_rate=step,
            batch_size=batch_size,
        )

        inputs: Optional[EagerTensor] = None
        targets: Optional[EagerTensor] = None
        for batch in dataset_train.take(1):
            inputs, targets = batch

        print('Input shape:', inputs.numpy().shape)
        print('Target shape:', targets.numpy().shape)

        # Training

        inputs: KerasTensor = Input(shape=(inputs.shape[1], inputs.shape[2]))
        lstm_out: KerasTensor = LSTM(32)(inputs)
        outputs: KerasTensor = Dense(1)(lstm_out)

        model: Model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(learning_rate=cls.LEARNING_RATE), loss='mse')
        model.summary()

        es_callback: EarlyStopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=5)

        checkpoint_callback: ModelCheckpoint = ModelCheckpoint(
            monitor='val_loss',
            filepath=PredictorUtils.PATH_CHECKPOINT_FILE,
            verbose=1,
            save_weights_only=True,
            save_best_only=True,
        )

        Utils.create_dir(PredictorUtils.PATH_MODEL_DIR)

        history: History = model.fit(
            dataset_train,
            epochs=cls.EPOCHS,
            validation_data=dataset_val,
            callbacks=[es_callback, checkpoint_callback],
        )
        model.save(PredictorUtils.PATH_MODEL_FILE)

        if show_visualization:
            PredictorUtils.visualize_loss(history, 'Training and Validation Loss')