def test_ignore_incomplete(self): self._check_output( BatchSlidingWindow(5, 5, 3, ignore_incomplete_batch=True).get_iterator( [np.arange(5), np.arange(-1, -6, -1)]), []) self._check_output( BatchSlidingWindow(7, 5, 3, ignore_incomplete_batch=True).get_iterator( [np.arange(7), np.arange(-1, -8, -1)]), [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[ -1, -2, -3, -4, -5 ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]])]) self._check_output( BatchSlidingWindow(9, 5, 3, ignore_incomplete_batch=True).get_iterator( [np.arange(9), np.arange(-1, -10, -1)]), [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[ -1, -2, -3, -4, -5 ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]])]) self._check_output( BatchSlidingWindow(10, 5, 3, ignore_incomplete_batch=True).get_iterator( [np.arange(10), np.arange(-1, -11, -1)]), [([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]], [[ -1, -2, -3, -4, -5 ], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7]]), ([[3, 4, 5, 6, 7], [4, 5, 6, 7, 8], [5, 6, 7, 8, 9]], [[ -4, -5, -6, -7, -8 ], [-5, -6, -7, -8, -9], [-6, -7, -8, -9, -10]])])
def test_validate_arrays(self): with pytest.raises(ValueError, match='`arrays` must not be empty'): _ = next(BatchSlidingWindow(10, 5, 3).get_iterator([])) with pytest.raises(ValueError, match=r'The shape of `arrays\[1\]` is expected ' r'to be \(10,\), but got \(10, 1\)'): _ = next( BatchSlidingWindow(10, 5, 3).get_iterator( [np.arange(10), np.arange(10).reshape([-1, 1])]))
def test_construction(self): with pytest.raises(ValueError, match='`window_size` must be at least 1'): _ = BatchSlidingWindow(10, 0, 3) with pytest.raises(ValueError, match='`array_size` must be at least as large as ' '`window_size`'): _ = BatchSlidingWindow(4, 5, 3) with pytest.raises(ValueError, match=r'The shape of `excludes` is expected to ' r'be \(10,\), but got \(9,\)'): _ = BatchSlidingWindow(10, 5, 3, excludes=np.arange(9))
def test_excludes(self): excludes = np.array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1], dtype=np.bool) self._check_output( BatchSlidingWindow(10, 3, 2, excludes=excludes).get_iterator( [np.arange(10), np.arange(-1, -11, -1)]), [([[1, 2, 3], [5, 6, 7]], [[-2, -3, -4], [-6, -7, -8]]), ([[6, 7, 8]], [[-7, -8, -9]])])
def test_shuffle(self): a_collector = [] b_collector = [] for a, b in BatchSlidingWindow(10, 5, 3, shuffle=True). \ get_iterator([np.arange(10), np.arange(-1, -11, -1)]): for a_row in a: a_collector.append(a_row) for b_row in b: b_collector.append(b_row) a_collector = np.asarray(a_collector) b_collector = np.asarray(b_collector) idx = np.argsort(a_collector[:, 0]) a = a_collector[idx, :] b = b_collector[idx, :] np.testing.assert_equal( a, [[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6, 7], [4, 5, 6, 7, 8], [5, 6, 7, 8, 9]]) np.testing.assert_equal(b, [[-1, -2, -3, -4, -5], [-2, -3, -4, -5, -6], [-3, -4, -5, -6, -7], [-4, -5, -6, -7, -8], [-5, -6, -7, -8, -9], [-6, -7, -8, -9, -10]])
def fit(self, values, labels, missing, mean, std, excludes=None, valid_portion=0.3, summary_dir=None): """ Train the :class:`Donut` model with given data. From https://github.com/haowen-xu/donut/blob/master/donut/training.py but without prints. Args: values (np.ndarray): 1-D `float32` array, the standardized KPI observations. labels (np.ndarray): 1-D `int32` array, the anomaly labels. missing (np.ndarray): 1-D `int32` array, the indicator of missing points. mean (float): The mean of KPI observations before standardization. std (float): The standard deviation of KPI observations before standardization. excludes (np.ndarray): 1-D `bool` array, indicators of whether or not to totally exclude a point. If a point is excluded, any window which contains that point is excluded. (default :obj:`None`, no point is totally excluded) valid_portion (float): Ratio of validation data out of all the specified training data. (default 0.3) summary_dir (str): Optional summary directory for :class:`tf.summary.FileWriter`. (default :obj:`None`, summary is disabled) """ sess = get_default_session_or_error() # split the training & validation set values = np.asarray(values, dtype=np.float32) labels = np.asarray(labels, dtype=np.int32) missing = np.asarray(missing, dtype=np.int32) if len(values.shape) != 1: raise ValueError('`values` must be a 1-D array') if labels.shape != values.shape: raise ValueError('The shape of `labels` does not agree with ' 'the shape of `values` ({} vs {})'. format(labels.shape, values.shape)) if missing.shape != values.shape: raise ValueError('The shape of `missing` does not agree with ' 'the shape of `values` ({} vs {})'. format(missing.shape, values.shape)) n = int(len(values) * valid_portion) train_values, v_x = values[:-n], values[-n:] train_labels, valid_labels = labels[:-n], labels[-n:] train_missing, valid_missing = missing[:-n], missing[-n:] v_y = np.logical_or(valid_labels, valid_missing).astype(np.int32) if excludes is None: train_excludes, valid_excludes = None, None else: train_excludes, valid_excludes = excludes[:-n], excludes[-n:] # data augmentation object and the sliding window iterator # If std is zero choose a number close to zero aug = MissingDataInjection(mean, std, self._missing_data_injection_rate) train_sliding_window = BatchSlidingWindow( array_size=len(train_values), window_size=self.model.x_dims, batch_size=self._batch_size, excludes=train_excludes, shuffle=True, ignore_incomplete_batch=True, ) valid_sliding_window = BatchSlidingWindow( array_size=len(v_x), window_size=self.model.x_dims, batch_size=self._valid_batch_size, excludes=valid_excludes, ) # initialize the variables of the trainer, and the model sess.run(self._trainer_initializer) ensure_variables_initialized(self._train_params) # training loop lr = self._initial_lr # Side effect. EarlyStopping stores variables temporarely in a Temp dir with TrainLoop( param_vars=self._train_params, early_stopping=True, summary_dir=summary_dir, max_epoch=self._max_epoch, max_step=self._max_step) as loop: # type: TrainLoop for epoch in loop.iter_epochs(): x, y1, y2 = aug.augment( train_values, train_labels, train_missing) y = np.logical_or(y1, y2).astype(np.int32) train_iterator = train_sliding_window.get_iterator([x, y]) for step, (batch_x, batch_y) in loop.iter_steps(train_iterator): # run a training step feed_dict = dict(six.iteritems(self._feed_dict)) feed_dict[self._learning_rate] = lr feed_dict[self._input_x] = batch_x feed_dict[self._input_y] = batch_y loss, _ = sess.run( [self._loss, self._train_op], feed_dict=feed_dict) loop.collect_metrics({'loss': loss}) if step % self._valid_step_freq == 0: # collect variable summaries if summary_dir is not None: loop.add_summary(sess.run(self._summary_op)) # do validation in batches with loop.timeit('valid_time'), loop.metric_collector('valid_loss') as mc: v_it = valid_sliding_window.get_iterator([v_x, v_y]) for b_v_x, b_v_y in v_it: feed_dict = dict( six.iteritems(self._valid_feed_dict)) feed_dict[self._input_x] = b_v_x feed_dict[self._input_y] = b_v_y loss = sess.run(self._loss, feed_dict=feed_dict) mc.collect(loss, weight=len(b_v_x)) # anneal the learning rate if self._lr_anneal_epochs and epoch % self._lr_anneal_epochs == 0: lr *= self._lr_anneal_factor