Esempio n. 1
0
 def test_ignore_incomplete(self):
     self._check_output(
         BatchSlidingWindow(5, 5, 3,
                            ignore_incomplete_batch=True).get_iterator(
                                [np.arange(5),
                                 np.arange(-1, -6, -1)]), [])
     self._check_output(
         BatchSlidingWindow(7, 5, 3,
                            ignore_incomplete_batch=True).get_iterator(
                                [np.arange(7),
                                 np.arange(-1, -8, -1)]),
         [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[
             -1, -2, -3, -4, -5
         ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]])])
     self._check_output(
         BatchSlidingWindow(9, 5, 3,
                            ignore_incomplete_batch=True).get_iterator(
                                [np.arange(9),
                                 np.arange(-1, -10, -1)]),
         [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[
             -1, -2, -3, -4, -5
         ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]])])
     self._check_output(
         BatchSlidingWindow(10, 5, 3,
                            ignore_incomplete_batch=True).get_iterator(
                                [np.arange(10),
                                 np.arange(-1, -11, -1)]),
         [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[
             -1, -2, -3, -4, -5
         ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]]),
          ([[3, 4, 5, 6, 7], [4, 5, 6, 7, 8], [5, 6, 7, 8, 9]], [[
              -4, -5, -6, -7, -8
          ], [-5, -6, -7, -8, -9], [-6, -7, -8, -9, -10]])])
Esempio n. 2
0
 def test_validate_arrays(self):
     with pytest.raises(ValueError, match='`arrays` must not be empty'):
         _ = next(BatchSlidingWindow(10, 5, 3).get_iterator([]))
     with pytest.raises(ValueError,
                        match=r'The shape of `arrays\[1\]` is expected '
                        r'to be \(10,\), but got \(10, 1\)'):
         _ = next(
             BatchSlidingWindow(10, 5, 3).get_iterator(
                 [np.arange(10),
                  np.arange(10).reshape([-1, 1])]))
Esempio n. 3
0
 def test_construction(self):
     with pytest.raises(ValueError,
                        match='`window_size` must be at least 1'):
         _ = BatchSlidingWindow(10, 0, 3)
     with pytest.raises(ValueError,
                        match='`array_size` must be at least as large as '
                        '`window_size`'):
         _ = BatchSlidingWindow(4, 5, 3)
     with pytest.raises(ValueError,
                        match=r'The shape of `excludes` is expected to '
                        r'be \(10,\), but got \(9,\)'):
         _ = BatchSlidingWindow(10, 5, 3, excludes=np.arange(9))
Esempio n. 4
0
 def test_excludes(self):
     excludes = np.array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1], dtype=np.bool)
     self._check_output(
         BatchSlidingWindow(10, 3, 2, excludes=excludes).get_iterator(
             [np.arange(10), np.arange(-1, -11, -1)]),
         [([[1, 2, 3], [5, 6, 7]], [[-2, -3, -4], [-6, -7, -8]]),
          ([[6, 7, 8]], [[-7, -8, -9]])])
Esempio n. 5
0
 def test_shuffle(self):
     a_collector = []
     b_collector = []
     for a, b in BatchSlidingWindow(10, 5, 3, shuffle=True). \
             get_iterator([np.arange(10), np.arange(-1, -11, -1)]):
         for a_row in a:
             a_collector.append(a_row)
         for b_row in b:
             b_collector.append(b_row)
     a_collector = np.asarray(a_collector)
     b_collector = np.asarray(b_collector)
     idx = np.argsort(a_collector[:, 0])
     a = a_collector[idx, :]
     b = b_collector[idx, :]
     np.testing.assert_equal(
         a, [[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6],
             [3, 4, 5, 6, 7], [4, 5, 6, 7, 8], [5, 6, 7, 8, 9]])
     np.testing.assert_equal(b,
                             [[-1, -2, -3, -4, -5], [-2, -3, -4, -5, -6],
                              [-3, -4, -5, -6, -7], [-4, -5, -6, -7, -8],
                              [-5, -6, -7, -8, -9], [-6, -7, -8, -9, -10]])
Esempio n. 6
0
    def fit(self, values, labels, missing, mean, std, excludes=None,
            valid_portion=0.3, summary_dir=None):
        """
        Train the :class:`Donut` model with given data.
        From https://github.com/haowen-xu/donut/blob/master/donut/training.py but without prints.

        Args:
            values (np.ndarray): 1-D `float32` array, the standardized
                KPI observations.
            labels (np.ndarray): 1-D `int32` array, the anomaly labels.
            missing (np.ndarray): 1-D `int32` array, the indicator of
                missing points.
            mean (float): The mean of KPI observations before standardization.
            std (float): The standard deviation of KPI observations before
                standardization.
            excludes (np.ndarray): 1-D `bool` array, indicators of whether
                or not to totally exclude a point.  If a point is excluded,
                any window which contains that point is excluded.
                (default :obj:`None`, no point is totally excluded)
            valid_portion (float): Ratio of validation data out of all the
                specified training data. (default 0.3)
            summary_dir (str): Optional summary directory for
                :class:`tf.summary.FileWriter`. (default :obj:`None`,
                summary is disabled)
        """
        sess = get_default_session_or_error()

        # split the training & validation set
        values = np.asarray(values, dtype=np.float32)
        labels = np.asarray(labels, dtype=np.int32)
        missing = np.asarray(missing, dtype=np.int32)
        if len(values.shape) != 1:
            raise ValueError('`values` must be a 1-D array')
        if labels.shape != values.shape:
            raise ValueError('The shape of `labels` does not agree with '
                             'the shape of `values` ({} vs {})'.
                             format(labels.shape, values.shape))
        if missing.shape != values.shape:
            raise ValueError('The shape of `missing` does not agree with '
                             'the shape of `values` ({} vs {})'.
                             format(missing.shape, values.shape))

        n = int(len(values) * valid_portion)
        train_values, v_x = values[:-n], values[-n:]
        train_labels, valid_labels = labels[:-n], labels[-n:]
        train_missing, valid_missing = missing[:-n], missing[-n:]
        v_y = np.logical_or(valid_labels, valid_missing).astype(np.int32)
        if excludes is None:
            train_excludes, valid_excludes = None, None
        else:
            train_excludes, valid_excludes = excludes[:-n], excludes[-n:]

        # data augmentation object and the sliding window iterator
        # If std is zero choose a number close to zero
        aug = MissingDataInjection(mean, std, self._missing_data_injection_rate)
        train_sliding_window = BatchSlidingWindow(
            array_size=len(train_values),
            window_size=self.model.x_dims,
            batch_size=self._batch_size,
            excludes=train_excludes,
            shuffle=True,
            ignore_incomplete_batch=True,
        )
        valid_sliding_window = BatchSlidingWindow(
            array_size=len(v_x),
            window_size=self.model.x_dims,
            batch_size=self._valid_batch_size,
            excludes=valid_excludes,
        )

        # initialize the variables of the trainer, and the model
        sess.run(self._trainer_initializer)
        ensure_variables_initialized(self._train_params)

        # training loop
        lr = self._initial_lr
        # Side effect. EarlyStopping stores variables temporarely in a Temp dir
        with TrainLoop(
                param_vars=self._train_params,
                early_stopping=True,
                summary_dir=summary_dir,
                max_epoch=self._max_epoch,
                max_step=self._max_step) as loop:  # type: TrainLoop

            for epoch in loop.iter_epochs():
                x, y1, y2 = aug.augment(
                    train_values, train_labels, train_missing)
                y = np.logical_or(y1, y2).astype(np.int32)

                train_iterator = train_sliding_window.get_iterator([x, y])
                for step, (batch_x, batch_y) in loop.iter_steps(train_iterator):
                    # run a training step
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._learning_rate] = lr
                    feed_dict[self._input_x] = batch_x
                    feed_dict[self._input_y] = batch_y
                    loss, _ = sess.run(
                        [self._loss, self._train_op], feed_dict=feed_dict)
                    loop.collect_metrics({'loss': loss})

                    if step % self._valid_step_freq == 0:
                        # collect variable summaries
                        if summary_dir is not None:
                            loop.add_summary(sess.run(self._summary_op))

                        # do validation in batches
                        with loop.timeit('valid_time'), loop.metric_collector('valid_loss') as mc:
                            v_it = valid_sliding_window.get_iterator([v_x, v_y])
                            for b_v_x, b_v_y in v_it:
                                feed_dict = dict(
                                    six.iteritems(self._valid_feed_dict))
                                feed_dict[self._input_x] = b_v_x
                                feed_dict[self._input_y] = b_v_y
                                loss = sess.run(self._loss, feed_dict=feed_dict)
                                mc.collect(loss, weight=len(b_v_x))

                # anneal the learning rate
                if self._lr_anneal_epochs and epoch % self._lr_anneal_epochs == 0:
                    lr *= self._lr_anneal_factor