Ejemplo n.º 1
0
 def test_get_default_session_or_error(self):
     with pytest.raises(RuntimeError, match='No session is active'):
         get_default_session_or_error()
     with self.test_session(use_gpu=False) as sess:
         self.assertIs(sess, get_default_session_or_error())
     with pytest.raises(RuntimeError, match='No session is active'):
         get_default_session_or_error()
Ejemplo n.º 2
0
    def set(self, value):
        """
        Set the value of the variable.

        Args:
            value: The value to be assigned to the variable.
        """
        get_default_session_or_error().run(
            self._self_assign_op, feed_dict={self._self_assign_ph: value})
Ejemplo n.º 3
0
def collect_outputs(outputs, inputs, data_flow, feed_dict=None, session=None):
    """
    Run TensorFlow graph by mini-batch and concat outputs from each batch.

    Args:
        outputs (Iterable[tf.Tensor]): Output tensors to be computed.
        inputs (Iterable[tf.Tensor]): Input placeholders.
        data_flow (DataFlow): Data flow to feed the input placeholders.
        feed_dict: Optional, additional feed dict.
        session: The TensorFlow session.  If not specified, use the
            default session.

    Returns:
        tuple[np.ndarray]: The concatenated outputs.
    """
    outputs = list(outputs)
    inputs = list(inputs)
    session = session or get_default_session_or_error()

    collected = [[] for _ in range(len(outputs))]
    for batch in data_flow:
        batch_feed_dict = merge_feed_dict(
            feed_dict, {k: v
                        for (k, v) in zip(inputs, batch)})
        for i, o in enumerate(session.run(outputs, feed_dict=batch_feed_dict)):
            collected[i].append(o)

    for i, batches in enumerate(collected):
        collected[i] = np.concatenate(batches, axis=0)
    return tuple(collected)
Ejemplo n.º 4
0
    def save(self, global_step=None, session=None):
        """
        Save the session to a checkpoint file.

        Args:
            global_step (int or tf.Tensor): The global step counter.
            session (tf.Session): The session to save.
                If not specified, select the default session.

        Returns:
            str: The path of the saved checkpoint file.
        """
        session = session or get_default_session_or_error()

        # save the states of savable objects into serial var
        if self._objects:
            object_states = {}
            for key, obj in six.iteritems(self._objects):
                object_states[key] = obj.get_state()

            serialized_states = pkl.dumps(
                object_states, protocol=pkl.HIGHEST_PROTOCOL)
            self._serial_var.set(serialized_states)

        # now save the variables to checkpoint file
        if not os.path.isdir(self.save_dir):
            makedirs(self.save_dir, exist_ok=True)
        return self._saver.save(
            session,
            os.path.join(self.save_dir, self.filename),
            global_step=global_step,
            write_meta_graph=self.save_meta
        )
Ejemplo n.º 5
0
    def run(self):
        """Run training loop."""
        if self._is_fitting:
            raise RuntimeError('`run()` is not re-entrant.')
        self._is_fitting = True
        try:
            # initialize global training status
            session = get_default_session_or_error()
            ensure_variables_initialized()
            self.loop.print_training_summary()

            # initialize internal status
            for hook_list in self.hook_lists:
                hook_list.reset()

            for epoch in self.loop.iter_epochs():
                # run before epoch hook
                self.before_epochs.call_hooks()

                # run steps of this epoch
                for payload in self._iter_steps():
                    # run before step hook
                    self.before_steps.call_hooks()

                    # run the step
                    self._run_step(session, payload)

                    # run after step hook
                    self.after_steps.call_hooks()

                # run after epoch hook
                self.after_epochs.call_hooks()
        finally:
            self._is_fitting = False
Ejemplo n.º 6
0
    def add_metrics(self, global_step=None, metrics=None, **kwargs):
        """Add a scalar metric as summary.

        Parameters
        ----------
        global_step : int | tf.Tensor | tf.Variable
            The global step counter. (optional)

        metrics, **kwargs
            Dict of metric values.
        """
        if metrics is not None and not isinstance(metrics, (dict, OrderedDict)):
            raise TypeError('%r should be a dict.' % (metrics,))

        values = []
        if metrics:
            for k, v in six.iteritems(metrics):
                values.append(tf.summary.Summary.Value(tag=k, simple_value=v))
        for k, v in six.iteritems(kwargs):
            values.append(tf.summary.Summary.Value(tag=k, simple_value=v))

        if values:
            if isinstance(global_step, (tf.Tensor, tf.Variable)):
                global_step = get_default_session_or_error().run(global_step)
            summary = tf.summary.Summary(value=values)
            self._writer.add_summary(summary, global_step=global_step)
Ejemplo n.º 7
0
    def run(self, feed_dict=None):
        """
        Run validation.

        Args:
            feed_dict (dict[tf.Tensor, any]): The extra feed dict to be
                merged with the already configured dict.  (default :obj:`None`)
        """
        session = get_default_session_or_error()

        with self.loop.timeit(self._time_metric_name), \
                self.loop.metric_collector(self._loss_metric_name) as mc:
            for batch_data in self.data_flow:
                # prepare for the batch feed dict
                feed_dict = resolve_feed_dict(
                    merge_feed_dict(self.feed_dict, feed_dict,
                                    zip(self.inputs, batch_data)))

                # run the mini-batch
                loss = self._run_batch(session, feed_dict)
                if self._loss_weight_func is not None:
                    loss_weight = self._loss_weight_func(*batch_data)
                else:
                    loss_weight = 1.
                mc.collect(loss, weight=loss_weight)
Ejemplo n.º 8
0
    def test_sampling_for_fully_dynamic_shape(self):
        with self.get_session(use_gpu=True):
            params = {
                k: np.tile(v, [10] + [1] * len(v.shape))
                for k, v in six.iteritems(self.simple_params)
            }

            # sample `x` from distribution with dynamic batch shape
            tf.set_random_seed(1234)
            params_ph = {
                k: tf.placeholder(tf.float32)
                for k, v in six.iteritems(self.simple_params)
            }
            feed_dict = {
                params_ph[k]:
                np.tile(self.simple_params[k],
                        [10] + [1] * len(self.simple_params[k].shape))
                for k in six.iterkeys(self.simple_params)
            }
            dist = self.dist_class(**params_ph)
            x = dist.sample()
            prob, log_prob = dist.prob(x), dist.log_prob(x)
            x, prob, log_prob = get_default_session_or_error().run(
                [x, prob, log_prob], feed_dict=feed_dict)

            value_shape, batch_shape = \
                self.get_shapes_for_param(**self.simple_params)
            np.testing.assert_equal(x.shape,
                                    [10] + list(batch_shape + value_shape))
            self.assert_allclose(prob, self.prob(x, **params))
            self.assert_allclose(log_prob, self.log_prob(x, **params))
Ejemplo n.º 9
0
 def get_samples_and_prob(self, sample_shape=(), feed_dict=None, **params):
     tf.set_random_seed(1234)
     dist = self.dist_class(**params)
     x = dist.sample(sample_shape)
     prob, log_prob = dist.prob(x), dist.log_prob(x)
     return get_default_session_or_error().run([x, prob, log_prob],
                                               feed_dict=feed_dict)
Ejemplo n.º 10
0
    def run(self, feed_dict=None):
        """
        Run evaluation.

        Args:
            feed_dict: The extra feed dict to be merged with the already
                configured dict.  (default :obj:`None`)
        """
        @contextmanager
        def timeit():
            if self.time_metric_name is not None:
                with self.loop.timeit(self.time_metric_name):
                    yield
            else:
                yield

        session = get_default_session_or_error()
        metric_tensors = list(six.itervalues(self.metrics))
        metric_names = list(six.iterkeys(self.metrics))
        metric_values = []
        metric_weights = []

        with timeit():
            for batch_data in self.data_flow:
                # prepare for the batch feed dict
                feed_dict = resolve_feed_dict(
                    merge_feed_dict(self.feed_dict, feed_dict,
                                    zip(self.inputs, batch_data)))

                # inspect the batch weight
                if self._batch_weight_func is not None:
                    batch_weight = self._batch_weight_func(*batch_data)
                else:
                    batch_weight = 1.
                metric_weights.append(batch_weight)

                # run the mini-batch
                batch_values = self._run_batch(session, feed_dict)
                for i, v in enumerate(batch_values):
                    if len(np.asarray(v).shape) != 0:  # pragma: no cover
                        raise ValueError(
                            'Metric is not a scalar: tensor {!r}, value {!r}.'.
                            format(v, metric_tensors[i]))

                # accumulate the metrics
                metric_values.append(np.asarray(batch_values))

        # now merge all batch metrics and do logging
        metric_values = np.average(
            np.stack(metric_values, axis=0),
            axis=0,
            weights=np.asarray(metric_weights),
        )
        assert (len(metric_names) == len(metric_values))
        self._last_metrics_dict = metrics_dict = {
            k: v
            for k, v in zip(metric_names, metric_values)
        }
        self.loop.collect_metrics(metrics_dict)
Ejemplo n.º 11
0
 def test_analytic_kld(self):
     with self.get_session(use_gpu=True):
         dist1 = self.dist_class(**self.simple_params)
         dist2 = self.dist_class(**self.kld_simple_params)
         kld = get_default_session_or_error().run(dist1.analytic_kld(dist2))
         self.assert_allclose(
             kld,
             self.analytic_kld(self.simple_params, self.kld_simple_params))
Ejemplo n.º 12
0
 def plot_samples(loop):
     with loop.timeit('plot_time'):
         session = get_default_session_or_error()
         images = session.run(x_plots, feed_dict={is_training: False})
         save_images_collection(images=images,
                                filename=results.prepare_parent(
                                    'plotting/{}.png'.format(loop.epoch)),
                                grid_size=(10, 10))
Ejemplo n.º 13
0
    def get_score(self, values, missing=None):
        """
        Get the `reconstruction probability` of specified KPI observations.

        The larger `reconstruction probability`, the less likely a point
        is anomaly.  You may take the negative of the score, if you want
        something to directly indicate the severity of anomaly.

        Args:
            values (np.ndarray): 1-D float32 array, the KPI observations.
            missing (np.ndarray): 1-D int32 array, the indicator of missing
                points.  If :obj:`None`, the MCMC missing data imputation
                will be disabled. (default :obj:`None`)

        Returns:
            np.ndarray: The `reconstruction probability`,
                1-D array if `last_point_only` is :obj:`True`,
                or 2-D array if `last_point_only` is :obj:`False`.
        """
        with tf.name_scope('DonutPredictor.get_score'):
            sess = get_default_session_or_error()
            collector = []

            # validate the arguments
            values = np.asarray(values, dtype=np.float32)
            if len(values.shape) != 1:
                raise ValueError('`values` must be a 1-D array')

            # run the prediction in mini-batches
            sliding_window = BatchSlidingWindow(
                array_size=len(values),
                window_size=self.model.x_dims,
                batch_size=self._batch_size,
            )
            if missing is not None:
                missing = np.asarray(missing, dtype=np.int32)
                if missing.shape != values.shape:
                    raise ValueError(
                        'The shape of `missing` does not agree '
                        'with the shape of `values` ({} vs {})'.format(
                            missing.shape, values.shape))
                for b_x, b_y in sliding_window.get_iterator([values, missing]):
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._input_x] = b_x
                    feed_dict[self._input_y] = b_y
                    b_r = sess.run(self._get_score(), feed_dict=feed_dict)
                    collector.append(b_r)
            else:
                for b_x, in sliding_window.get_iterator([values]):
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._input_x] = b_x
                    b_r = sess.run(self._get_score_without_y(),
                                   feed_dict=feed_dict)
                    collector.append(b_r)

            # merge the results of mini-batches
            result = np.concatenate(collector, axis=0)
            return result
Ejemplo n.º 14
0
    def fit(self, train_iterator, summary_dir=None):
        """
        Train the :class:`OmniAnomaly` model with given data.

        Args:
            values (np.ndarray): 1-D `float32` array, the standardized
                KPI observations.
            summary_dir (str): Optional summary directory for
                :class:`tf.summary.FileWriter`. (default :obj:`None`,
                summary is disabled)
        """
        sess = get_default_session_or_error()

        # initialize the variables of the trainer, and the model
        sess.run(self._trainer_initializer)
        ensure_variables_initialized(self._train_params)

        # training loop
        lr = self._initial_lr
        with TrainLoop(
                param_vars=self._train_params,
                summary_dir=summary_dir,
                max_epoch=self._max_epoch,
                max_step=self._max_step,
        ) as loop:  # type: TrainLoop
            # loop.print_training_summary()

            train_batch_time = []
            valid_batch_time = []

            time_train_start = time.time()
            for epoch in loop.iter_epochs():
                start_time = time.time()
                for step, idx in loop.iter_steps(range(len(train_iterator))):
                    # run a training step
                    batch_x = train_iterator[idx]
                    start_batch_time = time.time()
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._learning_rate] = lr
                    feed_dict[self._input_x] = batch_x
                    loss, _ = sess.run([self._loss, self._train_op],
                                       feed_dict=feed_dict)
                    loop.collect_metrics({"loss": loss})
                    train_batch_time.append(time.time() - start_batch_time)

                # anneal the learning rate
                if self._lr_anneal_epochs and epoch % self._lr_anneal_epochs == 0:
                    lr *= self._lr_anneal_factor
                    loop.println("Learning rate decreased to {}".format(lr),
                                 with_tag=True)

            time_train_end = time.time()
            return {
                # "best_valid_loss": float(loop.best_valid_metric),
                "train_time": np.sum(train_batch_time),
                "total_train_time": time_train_end - time_train_start,
            }
Ejemplo n.º 15
0
    def get_score(self, values):
        """
        Get the `reconstruction probability` of specified KPI observations.

        The larger `reconstruction probability`, the less likely a point
        is anomaly.  You may take the negative of the score, if you want
        something to directly indicate the severity of anomaly.

        Args:
            values (np.ndarray): 1-D float32 array, the KPI observations.

        Returns:
            np.ndarray: The `reconstruction probability`,
                1-D array if `last_point_only` is :obj:`True`,
                or 2-D array if `last_point_only` is :obj:`False`.
        """
        with tf.name_scope('Predictor.get_score'):
            sess = get_default_session_or_error()
            collector = []
            collector_z = []

            # validate the arguments
            values = np.asarray(values, dtype=np.float32)
            if len(values.shape) != 2:
                raise ValueError('`values` must be a 2-D array')

            # run the prediction in mini-batches
            sliding_window = BatchSlidingWindow(
                array_size=len(values),
                window_size=self.model.window_length,
                batch_size=self._batch_size,
            )

            pred_time = []

            for b_x, in sliding_window.get_iterator([values]):
                start_iter_time = time.time()

                input_adj = get_adj(b_x[..., :self._model.x_dims], self._model.config.gcn_type)
                feed_dict = dict(six.iteritems(self._feed_dict))

                feed_dict[self._input_x] = b_x[..., :self._model.x_dims]
                feed_dict[self._input_feature] = b_x[..., self._model.x_dims:]
                feed_dict[self._input_adj] = input_adj

                # b_r:(50,)一个batch的score
                b_r, q_net_z = sess.run(self._get_score_without_y(),
                                        feed_dict=feed_dict)
                collector.append(b_r)
                pred_time.append(time.time() - start_iter_time)
                collector_z.append(q_net_z)

            # merge the results of mini-batches
            result = np.concatenate(collector, axis=0)
            result_z = np.concatenate(collector_z, axis=0)
            return result, result_z, np.mean(pred_time)
Ejemplo n.º 16
0
    def test_get_default_session_or_error(self):
        def do_raise():
            with self.assertRaises(RuntimeError) as cm:
                get_default_session_or_error()
            self.assertIn('No session is active.', str(cm.exception))

        do_raise()
        with self.get_session() as sess:
            self.assertIs(sess, get_default_session_or_error())
        do_raise()
Ejemplo n.º 17
0
    def get_refactor_probability(self, values, missing=None):
        """
        获取指定KPI监测数据的“重构概率”。

        “重建概率”越大,异常点的可能性就越小。如果想要直接表明异常的严重程度,可以取这个分数的负值。

        Args:
            values (np.ndarray): 一维32位浮点数数组,KPI监测数据
            missing (np.ndarray): 一维32位整型数组,指明缺失点
                (default :obj:`None`,如果为 :obj:`None`, 不会进行缺失点注入 )

        Returns:
            np.ndarray: 重构概率,`last_point_only`如果是 :obj:`True`,就是一维数组,
                `last_point_only`如果是 :obj:`False`,就是二维数组
        """
        tc = TimeCounter()
        tc.start()
        with tf.name_scope('DonutPredictor.get_refactor_probability'):
            sess = get_default_session_or_error()
            collector = []
            # 校验参数
            values = np.asarray(values, dtype=np.float32)
            if len(values.shape) != 1:
                raise ValueError('`values` 必须为一维数组')
            # 对每个小切片进行预测
            # 滑动窗口
            sliding_window = BatchSlidingWindow(array_size=len(values),
                                                window_size=self.model.x_dims,
                                                batch_size=self._batch_size)
            # 有缺失点
            if missing is not None:
                missing = np.asarray(missing, dtype=np.int32)
                # 缺失点shape必须与values的shape相同
                if missing.shape != values.shape:
                    raise ValueError(
                        '`missing` 的形状必须与`values`的形状相同 ({} vs {})'.format(
                            missing.shape, values.shape))
                for b_x, b_y in sliding_window.get_iterator([values, missing]):
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._input_x] = b_x
                    feed_dict[self._input_y] = b_y
                    b_r = sess.run(self._get_refactor_probability(),
                                   feed_dict=feed_dict)
                    collector.append(b_r)
            else:
                for b_x, in sliding_window.get_iterator([values]):
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._input_x] = b_x
                    b_r = sess.run(self._get_refactor_probability_without_y(),
                                   feed_dict=feed_dict)
                    collector.append(b_r)
            # 合并小切片的数据
            tc.end()
            test_probability_time = tc.get_s() + "秒"
            return np.concatenate(collector, axis=0), test_probability_time
Ejemplo n.º 18
0
 def test_prob_with_higher_dimensional_params(self):
     with self.get_session(use_gpu=True):
         x, _, _ = self.get_samples_and_prob(
             **self.extended_dimensional_params)
         x = x[0, ...]
         dist = self.dist_class(**self.extended_dimensional_params)
         prob, log_prob = get_default_session_or_error().run(
             [dist.prob(x), dist.log_prob(x)])
         self.assert_allclose(
             prob, self.prob(x, **self.extended_dimensional_params))
         self.assert_allclose(
             log_prob, self.log_prob(x, **self.extended_dimensional_params))
Ejemplo n.º 19
0
    def add_summary(self, summary, global_step=None):
        """Add a summary object.

        Parameters
        ----------
        summary : bytes | tf.summary.Summary
            The summary object.

        global_step : int | tf.Tensor | tf.Variable
            The global step counter. (optional)
        """
        if isinstance(global_step, (tf.Tensor, tf.Variable)):
            global_step = get_default_session_or_error().run(global_step)
        self._writer.add_summary(summary, global_step=global_step)
Ejemplo n.º 20
0
    def save(self, global_step=None):
        """
        Save the checkpoint to file.

        Args:
            global_step (int or tf.Tensor): The global step counter.
        """
        sess = get_default_session_or_error()
        makedirs(self.save_dir, exist_ok=True)
        self._saver.save(sess,
                         os.path.join(self.save_dir, self.filename),
                         global_step=global_step,
                         latest_filename=self.latest_file,
                         write_meta_graph=self.save_meta)
Ejemplo n.º 21
0
    def collect_metrics(self, metrics, global_step=None):
        """
        Collect the statistics of metrics.

        Args:
            metrics (dict[str, float or np.ndarray or ScheduledVariable]):
                Dict from metrics names to their values.
                For :meth:`format_logs`, there is no difference between
                calling :meth:`collect_metrics` only once, with an array
                of metric values; or calling :meth:`collect_metrics` multiple
                times, with one value at each time.
                However, for the TensorFlow summary writer, only the mean of
                the metric values would be recorded, if calling
                :meth:`collect_metrics` with an array.
            global_step (int or tf.Variable or tf.Tensor): The global step
                counter. (optional)
        """
        from tfsnippet.trainer import ScheduledVariable
        tf_summary_values = []
        for k, v in six.iteritems(metrics):
            if isinstance(v, ScheduledVariable):
                v = v.get()
            v = np.asarray(v)
            self._metrics[k].collect(v)

            if self._summary_writer is not None and \
                    (self._summary_skip_pattern is None or
                     not self._summary_skip_pattern.match(k)):
                skip_count = self._metrics_skip_counter.get(k, 0)
                freq_limit = self._summary_commit_freqs.get(k, 1)
                if skip_count + 1 >= freq_limit:
                    self._metrics_skip_counter[k] = 0
                    tag = self._summary_metric_prefix + k
                    tf_summary_values.append(
                        tf.summary.Summary.Value(tag=tag,
                                                 simple_value=v.mean()))
                else:
                    self._metrics_skip_counter[k] = skip_count + 1

        if tf_summary_values:
            summary = tf.summary.Summary(value=tf_summary_values)
            if global_step is not None and \
                    isinstance(global_step, (tf.Variable, tf.Tensor)):
                global_step = get_default_session_or_error().run(global_step)
            self._summary_writer.add_summary(summary, global_step=global_step)
Ejemplo n.º 22
0
    def add_graph(self, graph=None, global_step=None):
        """Add graph to the summary.

        Parameters
        ----------
        graph : tf.Graph
            The graph to be added.  If not specified, will add the
            current active graph.

        global_step : int | tf.Tensor | tf.Variable
            The global step counter. (optional)
        """
        if isinstance(global_step, (tf.Tensor, tf.Variable)):
            global_step = get_default_session_or_error().run(global_step)
        self._writer.add_graph(
            graph or tf.get_default_graph(),
            global_step=global_step
        )
Ejemplo n.º 23
0
    def run(self):
        """Run training loop."""
        if self._is_fitting:
            raise RuntimeError('`run()` is not re-entrant.')
        self._is_fitting = True
        try:
            # trigger the before execution event
            self.events.fire(EventKeys.BEFORE_EXECUTION, self)

            # initialize global training status
            session = get_default_session_or_error()
            if self._ensure_variables_initialized:
                ensure_variables_initialized()
            self.loop.print_training_summary()

            for _ in self.loop.iter_epochs():
                # trigger before epoch event
                self.events.fire(EventKeys.BEFORE_EPOCH, self)

                # run steps of this epoch
                for payload in self._iter_steps():
                    # trigger before step event
                    self.events.fire(EventKeys.BEFORE_STEP, self)

                    # run the step
                    self._run_step(session, payload)

                    # trigger after step events
                    self.events.fire(EventKeys.STEP_EVALUATION, self)
                    self.events.fire(EventKeys.STEP_ANNEALING, self)
                    self.events.fire(EventKeys.STEP_LOGGING, self)
                    self.events.reverse_fire(EventKeys.AFTER_STEP, self)

                # trigger after epoch events
                self.events.fire(EventKeys.EPOCH_EVALUATION, self)
                self.events.fire(EventKeys.EPOCH_ANNEALING, self)
                self.events.fire(EventKeys.EPOCH_LOGGING, self)
                self.events.reverse_fire(EventKeys.AFTER_EPOCH, self)

            # trigger the after execution event
            self.events.reverse_fire(EventKeys.AFTER_EXECUTION, self)
        finally:
            self._is_fitting = False
Ejemplo n.º 24
0
    def _run(self, images, output):
        sess = get_default_session_or_error()
        err_msg = ('`images` must be a list of bytes, or a numpy array of '
                   'shape (?, ?, ?, 3).')
        if isinstance(images, list):
            for im in images:
                if not isinstance(im, six.binary_type):
                    raise TypeError(err_msg)
            input_tensor = self._jpeg_input
            get_image = lambda i: images[i]
        elif isinstance(images, np.ndarray):
            if len(images.shape) != 4 or images.shape[3] != 3:
                raise TypeError(err_msg)
            input_tensor = self._array_input
            get_image = lambda i: images[i:i + 1].astype(np.float32)
        else:
            raise TypeError(err_msg)

        ret = []
        for i in range(len(images)):
            ret.append(sess.run(output, {input_tensor: get_image(i)}))
        return np.concatenate(ret, axis=0)
Ejemplo n.º 25
0
    def restore(self, ignore_non_exist=False):
        """
        Restore the checkpoint from file if it exists.

        Args:
            ignore_non_exist (bool): Whether or not to ignore error if the
                checkpoint file does not exist? (default :obj:`False`)

        Raises:
            IOError: If the checkpoint files do not exist, and
                `ignore_non_exist` is not :obj:`True`.
        """
        file_path = self.get_latest_file()
        if file_path:
            sess = get_default_session_or_error()
            self._saver.restore(sess, file_path)
            getLogger(__name__).debug('Restored from checkpoint file %r.',
                                      file_path)
        elif not ignore_non_exist:
            raise IOError(
                'Checkpoint file does not exist in directory {}'.format(
                    self.save_dir))
Ejemplo n.º 26
0
    def get_score(self, test_iterator):
        """
        Get the `reconstruction probability` of specified KPI observations.

        The larger `reconstruction probability`, the less likely a point
        is anomaly.  You may take the negative of the score, if you want
        something to directly indicate the severity of anomaly.

        Args:
            values (np.ndarray): 1-D float32 array, the KPI observations.

        Returns:
            np.ndarray: The `reconstruction probability`,
                1-D array if `last_point_only` is :obj:`True`,
                or 2-D array if `last_point_only` is :obj:`False`.
        """
        with tf.name_scope("Predictor.get_score"):
            sess = get_default_session_or_error()
            collector = []
            collector_z = []
            pred_time = []

            for idx in range(len(test_iterator)):
                b_x = test_iterator[idx]
                start_iter_time = time.time()
                feed_dict = dict(six.iteritems(self._feed_dict))
                feed_dict[self._input_x] = b_x
                b_r, q_net_z = sess.run(
                    self._get_score_without_y(), feed_dict=feed_dict
                )
                collector.append(b_r)
                pred_time.append(time.time() - start_iter_time)
                collector_z.append(q_net_z)

            # merge the results of mini-batches
            result = np.concatenate(collector, axis=0)
            result_z = np.concatenate(collector_z, axis=0)
            return result, result_z, np.sum(pred_time)
Ejemplo n.º 27
0
    def restore(self, save_path, session=None):
        """
        Restore from a checkpoint file.

        Args:
            save_path (str): Restore from this checkpoint file.
            session (tf.Session): Restore the variables into this session.
                If not specified, restore into the default session.
        """
        session = session or get_default_session_or_error()

        # restore the variables
        self._saver.restore(session, save_path)

        # restore the states of savable objects
        if self._objects:
            object_states = pkl.loads(self._serial_var.get(session))
            assert(isinstance(object_states, dict))

            for key, obj in six.iteritems(self._objects):
                if key not in object_states:
                    raise KeyError('Object `{}` not found in the checkpoint: '
                                   '{}'.format(key, save_path))
                obj.set_state(object_states[key])
Ejemplo n.º 28
0
    def collect_metrics(self, metrics, global_step=None):
        """
        Collect the statistics of metrics.

        Args:
            metrics (dict[str, float or np.ndarray or DynamicValue]):
                Dict from metrics names to their values.
                For :meth:`format_logs`, there is no difference between
                calling :meth:`collect_metrics` only once, with an array
                of metric values; or calling :meth:`collect_metrics` multiple
                times, with one value at each time.
                However, for the TensorFlow summary writer, only the mean of
                the metric values would be recorded, if calling
                :meth:`collect_metrics` with an array.
            global_step (int or tf.Variable or tf.Tensor): The global step
                counter. (optional)
        """
        from tfsnippet.trainer import DynamicValue
        tf_summary_values = []
        for k, v in six.iteritems(metrics):
            if isinstance(v, DynamicValue):
                v = v.get()
            v = np.asarray(v)
            self._metrics[k].collect(v)

            if self._summary_writer is not None:
                mean_value = v.mean()
                tf_summary_values.append(
                    tf.summary.Summary.Value(tag=k, simple_value=mean_value))

        if tf_summary_values:
            summary = tf.summary.Summary(value=tf_summary_values)
            if global_step is not None and \
                    isinstance(global_step, (tf.Variable, tf.Tensor)):
                global_step = get_default_session_or_error().run(global_step)
            self._summary_writer.add_summary(summary, global_step=global_step)
Ejemplo n.º 29
0
    def fit(self, values, valid_portion=0.01, summary_dir=None):
        """
        Train the :class:`OmniAnomaly` model with given data.

        Args:
            values (np.ndarray): 1-D `float32` array, the standardized
                KPI observations.
            valid_portion (float): Ratio of validation data out of all the
                specified training data. (default 0.3)
            summary_dir (str): Optional summary directory for
                :class:`tf.summary.FileWriter`. (default :obj:`None`,
                summary is disabled)
        """
        sess = get_default_session_or_error()

        # split the training & validation set
        values = np.asarray(values, dtype=np.float32)
        if len(values.shape) != 2:
            raise ValueError("`values` must be a 2-D array")

        n = int(len(values) * valid_portion)
        train_values, v_x = values[:-n], values[-n:]

        train_sliding_window = BatchSlidingWindow(
            array_size=len(train_values),
            window_size=self.model.window_length,
            batch_size=self._batch_size,
            shuffle=True,
            ignore_incomplete_batch=True,
        )
        valid_sliding_window = BatchSlidingWindow(
            array_size=len(v_x),
            window_size=self.model.window_length,
            batch_size=self._valid_batch_size,
        )

        # initialize the variables of the trainer, and the model
        sess.run(self._trainer_initializer)
        ensure_variables_initialized(self._train_params)

        # training loop
        lr = self._initial_lr
        with TrainLoop(
            param_vars=self._train_params,
            early_stopping=True,
            summary_dir=summary_dir,
            max_epoch=self._max_epoch,
            max_step=self._max_step,
        ) as loop:  # type: TrainLoop
            loop.print_training_summary()

            train_batch_time = []
            valid_batch_time = []

            time_train_start = time.time()
            for epoch in loop.iter_epochs():
                print("train_values:", train_values.shape)
                train_iterator = train_sliding_window.get_iterator([train_values])
                start_time = time.time()
                for step, (batch_x,) in loop.iter_steps(train_iterator):
                    # run a training step
                    start_batch_time = time.time()
                    feed_dict = dict(six.iteritems(self._feed_dict))
                    feed_dict[self._learning_rate] = lr
                    feed_dict[self._input_x] = batch_x
                    loss, _ = sess.run(
                        [self._loss, self._train_op], feed_dict=feed_dict
                    )
                    loop.collect_metrics({"loss": loss})
                    train_batch_time.append(time.time() - start_batch_time)

                    # if step % self._valid_step_freq == 0:
                    #     train_duration = time.time() - start_time
                    #     loop.collect_metrics({"train_time": train_duration})
                    #     # collect variable summaries
                    #     if summary_dir is not None:
                    #         loop.add_summary(sess.run(self._summary_op))

                    #     # do validation in batches
                    #     with loop.timeit("valid_time"), loop.metric_collector(
                    #         "valid_loss"
                    #     ) as mc:
                    #         v_it = valid_sliding_window.get_iterator([v_x])
                    #         for (b_v_x,) in v_it:
                    #             start_batch_time = time.time()
                    #             feed_dict = dict(six.iteritems(self._valid_feed_dict))
                    #             feed_dict[self._input_x] = b_v_x
                    #             loss = sess.run(self._loss, feed_dict=feed_dict)
                    #             valid_batch_time.append(time.time() - start_batch_time)
                    #             mc.collect(loss, weight=len(b_v_x))

                    #     # print the logs of recent steps
                    #     loop.print_logs()
                    #     start_time = time.time()

                # anneal the learning rate
                if self._lr_anneal_epochs and epoch % self._lr_anneal_epochs == 0:
                    lr *= self._lr_anneal_factor
                    loop.println(
                        "Learning rate decreased to {}".format(lr), with_tag=True
                    )

            time_train_end = time.time()
            return {
                # "best_valid_loss": float(loop.best_valid_metric),
                "train_time": np.sum(train_batch_time),
                "valid_time": 0,
                "total_train_time": time_train_end - time_train_start,
            }
Ejemplo n.º 30
0
def collect_outputs(outputs, inputs, data_flow, mode='concat', axis=0,
                    feed_dict=None, session=None):
    """
    Run TensorFlow nodes by mini-batch and collect outputs from each batch.

    Args:
        outputs (Iterable[tf.Tensor] or dict[str, tf.Tensor]): The output
            tensors to be computed.
        inputs (Iterable[tf.Tensor]): Input placeholders.
        data_flow (DataFlow): Data flow to feed the input placeholders.
        mode ({'concat', 'average'}): If "concat", will concatenate the outputs
            from each mini-batch.  If "average", the output from each batch
            must be a scalar, and if so, this method will take average of the
            outputs from each mini-batch, weighted according to the batch size.
        axis (int): The axis for concatenation.
        feed_dict: Optional, additional feed dict.
        session: The TensorFlow session.  If not specified, use the
            default session.

    Returns:
        tuple[np.ndarray] or dict[str, tf.Tensor]: The collected outputs.
            Returns a dict if `outputs` is a dict, or a tuple otherwise.
    """
    mode = validate_enum_arg('mode', mode, ['concat', 'average'])
    session = session or get_default_session_or_error()

    if isinstance(outputs, (dict, OrderedDict)):
        output_keys = list(outputs)
        outputs = [tf.convert_to_tensor(outputs[k]) for k in output_keys]
    else:
        output_keys = None
        outputs = [tf.convert_to_tensor(o) for o in outputs]
    inputs = [tf.convert_to_tensor(i) for i in inputs]

    # check the shape of output tensors
    for i, o in enumerate(outputs):
        o_shape = o.get_shape()
        if mode == 'concat':
            if o_shape.ndims is not None and o_shape.ndims < 1:
                raise ValueError('`mode` is "concat", but the {}-th output '
                                 'is a scalar: {!r}'.format(i, o))
        else:
            if o_shape.ndims is not None and o_shape.ndims > 0:
                raise ValueError('`mode` is "average", but the {}-th output '
                                 'is not a scalar: {!r}'.format(i, o))

    collected = [[] for _ in range(len(outputs))]
    weights = []

    for batch in data_flow:
        weights.append(len(batch[0]))
        batch_feed_dict = merge_feed_dict(
            feed_dict,
            {k: v for (k, v) in zip(inputs, batch)}
        )
        batch_feed_dict = resolve_feed_dict(batch_feed_dict)
        for i, o in enumerate(session.run(outputs, feed_dict=batch_feed_dict)):
            collected[i].append(o)

    weights = np.asarray(weights, dtype=np.float32)
    for i, batches in enumerate(collected):
        if mode == 'average':
            stacked = np.stack(batches, axis=0)
            assert(len(stacked.shape) == 1)
            collected[i] = np.average(stacked, axis=0, weights=weights)
        else:
            collected[i] = np.concatenate(batches, axis=axis)

    if output_keys is not None:
        collected = dict(zip(output_keys, collected))
    else:
        collected = tuple(collected)
    return collected