Example #1
0
    def after_run(self, run_context, run_values):
        del run_context  # Unused by feature importance summary saver hook.

        # Read result tensors.
        global_step = run_values.results["global_step"]
        feature_names = run_values.results["feature_names"]
        feature_usage_counts = run_values.results["feature_usage_counts"]
        feature_gains = run_values.results["feature_gains"]

        # Ensure summaries are logged at desired frequency
        if (self._last_triggered_step is not None and
                global_step < self._last_triggered_step + self._every_n_steps):
            return

        # Validate tensors.
        if (len(feature_names) != len(feature_usage_counts)
                or len(feature_names) != len(feature_gains)):
            raise RuntimeError(
                "Feature names and importance measures have inconsistent lengths."
            )

        # Compute total usage.
        total_usage_count = 0.0
        for usage_count in feature_usage_counts:
            total_usage_count += usage_count
        usage_count_norm = 1.0 / total_usage_count if total_usage_count else 1.0

        # Compute total gain.
        total_gain = 0.0
        for gain in feature_gains:
            total_gain += gain
        gain_norm = 1.0 / total_gain if total_gain else 1.0

        # Output summary for each feature.
        self._last_triggered_step = global_step
        for (name, usage_count, gain) in zip(feature_names,
                                             feature_usage_counts,
                                             feature_gains):
            output_dir = os.path.join(self._model_dir, name.decode("utf-8"))
            summary_writer = SummaryWriterCache.get(output_dir)
            usage_count_summary = Summary(value=[
                Summary.Value(tag="feature_importance/usage_counts",
                              simple_value=usage_count)
            ])
            usage_fraction_summary = Summary(value=[
                Summary.Value(tag="feature_importance/usage_fraction",
                              simple_value=usage_count * usage_count_norm)
            ])
            summary_writer.add_summary(usage_count_summary, global_step)
            summary_writer.add_summary(usage_fraction_summary, global_step)
            gains_summary = Summary(value=[
                Summary.Value(tag="feature_importance/gains",
                              simple_value=gain)
            ])
            gains_fraction_summary = Summary(value=[
                Summary.Value(tag="feature_importance/gains_fraction",
                              simple_value=gain * gain_norm)
            ])
            summary_writer.add_summary(gains_summary, global_step)
            summary_writer.add_summary(gains_fraction_summary, global_step)
Example #2
0
def tf_scalar_summary(vals):
    # pylint: disable=import-error,no-name-in-module
    from tensorflow.core.framework.summary_pb2 import Summary

    return Summary(value=[
        Summary.Value(tag=key, simple_value=val) for key, val in vals.items()
    ])
Example #3
0
    def log_image(self, step, tag, val):
        '''
        Write an image event.

        :param int step: Time step (x-axis in TensorBoard graphs)
        :param str tag: Label for this value
        :param numpy.ndarray val: Image in RGB format with values from
            0 to 255; a 3-D array with index order (row, column, channel).
            `val.shape[-1] == 3`
        '''
        # TODO: support floating-point tensors, 4-D tensors, grayscale
        if len(val.shape) != 3:
            raise ValueError(
                '`log_image` value should be a 3-D tensor, instead got shape %s'
                % (val.shape, ))
        if val.shape[2] != 3:
            raise ValueError(
                'Last dimension of `log_image` value should be 3 (RGB), '
                'instead got shape %s' % (val.shape, ))
        fakefile = StringIO()
        png.Writer(size=(val.shape[1], val.shape[0])).write(
            fakefile, val.reshape(val.shape[0], val.shape[1] * val.shape[2]))
        encoded = fakefile.getvalue()
        # https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/core/framework/summary.proto
        RGB = 3
        image = Summary.Image(height=val.shape[0],
                              width=val.shape[1],
                              colorspace=RGB,
                              encoded_image_string=encoded)
        summary = Summary(value=[Summary.Value(tag=tag, image=image)])
        self._add_event(step, summary)
Example #4
0
def image(tag, tensor):
    """Outputs a `Summary` protocol buffer with images.
    The summary has up to `max_images` summary values containing images. The
    images are built from `tensor` which must be 3-D with shape `[height, width,
    channels]` and where `channels` can be:
    *  1: `tensor` is interpreted as Grayscale.
    *  3: `tensor` is interpreted as RGB.
    *  4: `tensor` is interpreted as RGBA.
    The `name` in the outputted Summary.Value protobufs is generated based on the
    name, with a suffix depending on the max_outputs setting:
    *  If `max_outputs` is 1, the summary value tag is '*name*/image'.
    *  If `max_outputs` is greater than 1, the summary value tags are
       generated sequentially as '*name*/image/0', '*name*/image/1', etc.
    Args:
      tag: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
        channels]` where `channels` is 1, 3, or 4.
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    tag = _clean_tag(tag)
    if not isinstance(tensor, np.ndarray):
        # try conversion, if failed then need handle by user.
        tensor = np.ndarray(tensor, dtype=np.float32)
    shape = tensor.shape
    height, width, channel = shape[0], shape[1], shape[2]
    if channel == 1:
        # walk around. PIL's setting on dimension.
        tensor = np.reshape(tensor, (height, width))
    image = make_image(tensor, height, width, channel)
    return Summary(value=[Summary.Value(tag=tag, image=image)])
Example #5
0
 def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
   steps_per_sec = elapsed_steps / elapsed_time
   if self._summary_writer is not None:
     summary = Summary(value=[Summary.Value(
         tag=self._summary_tag, simple_value=steps_per_sec)])
     self._summary_writer.add_summary(summary, global_step)
   logging.info("%s: %g", self._summary_tag, steps_per_sec)
Example #6
0
 def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
   images_per_sec = elapsed_steps / elapsed_time * FLAGS.batch_size * hvd.size()
   summary_tag = 'images/sec'
   if self._summary_writer is not None:
     summary = Summary(value=[Summary.Value(
         tag=summary_tag, simple_value=images_per_sec)])
     self._summary_writer.add_summary(summary, global_step)
   logging.info("%s: %g", summary_tag, images_per_sec)
def hyper_log(rmse, job_dir):
    log = Summary(value=[
        Summary.Value(tag='training/hptuning/metric', simple_value=rmse)
    ])
    logpath = os.path.join(job_dir, 'eval')
    writer = tf.summary.FileWriter(logpath)
    writer.add_summary(log)
    writer.flush()
Example #8
0
 def after_run(self, run_context, run_values):
     global_episode = run_values.results
     self._num_steps += 1
     if self._timer.should_trigger_for_episode(global_episode):
         elapsed_time, elapsed_steps = self._timer.update_last_triggered_episode(global_episode)
         if elapsed_time is not None:
             steps_per_sec = elapsed_steps / elapsed_time
             if self._summary_writer is not None:
                 summary = Summary(value=[
                     Summary.Value(tag=self._summary_sec_tag, simple_value=steps_per_sec),
                     Summary.Value(tag=self._summary_steps_tag, simple_value=self._num_steps),
                 ])
                 self._summary_writer.add_summary(summary, global_episode)
             logging.info("%s: %g, %s: %d",
                          self._summary_sec_tag, steps_per_sec,
                          self._summary_steps_tag, self._num_steps)
             self._num_steps = 0
Example #9
0
 def _log_and_record(self, step):
     if self._summary_writer is not None:
         if self._total_batch_size:
             img_per_sec_tag = 'eval/img_per_sec'
             img_per_sec_tag_value = self._total_batch_size / (
                 self._run_end - self._run_begin)
             sec_per_img_tag = 'eval/sec_per_img'
             sec_per_img_tag_value = 1 / img_per_sec_tag_value * 1000
             summary = Summary(value=[
                 Summary.Value(tag=img_per_sec_tag,
                               simple_value=img_per_sec_tag_value),
                 Summary.Value(tag=sec_per_img_tag,
                               simple_value=sec_per_img_tag_value)
             ])
             logging.info("%s: %g, %s: %g ms, step: %g", img_per_sec_tag,
                          img_per_sec_tag_value, sec_per_img_tag,
                          sec_per_img_tag_value, step)
             self._summary_writer.add_summary(summary, step)
 def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
     examples_per_sec = self.batch_size * elapsed_steps / elapsed_time
     example_summary_tag = 'examples/sec'
     if self._summary_writer is not None:
         summary = Summary(value=[
             Summary.Value(tag=example_summary_tag,
                           simple_value=examples_per_sec)
         ])
         self._summary_writer.add_summary(summary, global_step)
     logging.info("%s: %g", example_summary_tag, examples_per_sec)
Example #11
0
    def log_scalar(self, step, tag, val):
        '''
        Write a scalar event.

        :param int step: Time step (x-axis in TensorBoard graphs)
        :param str tag: Label for this value
        :param float val: Scalar to graph at this time step (y-axis)
        '''
        summary = Summary(value=[Summary.Value(tag=tag, simple_value=float(np.float32(val)))])
        self._add_event(step, summary)
Example #12
0
def write_hptuning_metric(args, metric):
    summary = Summary(value=[
        Summary.Value(tag='training/hptuning/metric', simple_value=metric)
    ])

    eval_path = os.path.join(args.output_dir, 'eval')
    summary_writer = tf.summary.FileWriter(eval_path)

    summary_writer.add_summary(summary)
    summary_writer.flush()
Example #13
0
 def every_n_step_end(self, current_step, outputs):
   current_time = time.time()
   if self._last_reported_time is not None and self._summary_writer:
     added_steps = current_step - self._last_reported_step
     elapsed_time = current_time - self._last_reported_time
     steps_per_sec = added_steps / elapsed_time
     summary = Summary(value=[Summary.Value(tag=self._summary_tag,
                                            simple_value=steps_per_sec)])
     self._summary_writer.add_summary(summary, current_step)
   self._last_reported_step = current_step
   self._last_reported_time = current_time
    def add_summary(self, summary_tag, summary_value, global_step):
        """ Adds summary at specific step.

        Args:
            summary_tag: A string, the name of the summary.
            summary_value: The value of the summary at current step.
            global_step: The step.
        """
        summary = Summary(
            value=[Summary.Value(tag=summary_tag, simple_value=summary_value)])
        self._summary_writer.add_summary(summary, global_step)
        self._summary_writer.flush()
Example #15
0
    def log_histogram(self, step, tag, val):
        '''
        Write a histogram event.

        :param int step: Time step (x-axis in TensorBoard graphs)
        :param str tag: Label for this value
        :param numpy.ndarray val: Arbitrary-dimensional array containing
            values to be aggregated in the resulting histogram.
        '''
        hist = Histogram()
        hist.add(val)
        summary = Summary(value=[Summary.Value(tag=tag, histo=hist.encode_to_proto())])
        self._add_event(step, summary)
Example #16
0
    def _log_and_record(self, global_step, learning_rate, total_loss, mlm_loss,
                        nsp_loss):
        time_per_step = self.elapsed_secs / self.count
        throughput = self.global_batch_size / time_per_step
        log_string = '  '
        log_string += 'Step = %6i' % (global_step)
        log_string += ', throughput = %6.1f' % (throughput)
        log_string += ', total_loss = %6.3f' % (total_loss)
        log_string += ', mlm_oss = %6.4e' % (mlm_loss)
        log_string += ', nsp_loss = %6.4e' % (nsp_loss)
        log_string += ', learning_rate = %6.4e' % (learning_rate)
        tf.compat.v1.logging.info(log_string)

        if self.summary_writer is not None:
            throughput_summary = Summary(value=[
                Summary.Value(tag='throughput', simple_value=throughput)
            ])
            self.summary_writer.add_summary(throughput_summary, global_step)
            total_loss_summary = Summary(value=[
                Summary.Value(tag='total_loss', simple_value=total_loss)
            ])
            self.summary_writer.add_summary(total_loss_summary, global_step)
  def after_run(self, run_context, run_values):
    _ = run_context

    global_step = run_values.results
    if self._timer.should_trigger_for_step(global_step):
      elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
          global_step)
      if elapsed_time is not None:
        steps_per_sec = elapsed_steps / elapsed_time
        if self._summary_writer is not None:
          summary = Summary(value=[Summary.Value(
              tag=self._summary_tag, simple_value=steps_per_sec)])
          self._summary_writer.add_summary(summary, global_step)
        logging.info("%s: %g", self._summary_tag, steps_per_sec)
Example #18
0
    def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
        steps_per_sec = elapsed_steps / elapsed_time
        if self._summary_writer is not None:
            if self._total_batch_size:
                image_tag = 'images_sec'
                image_count = float(steps_per_sec) * self._total_batch_size
                summary = Summary(value=[
                    Summary.Value(tag=self._summary_tag,
                                  simple_value=steps_per_sec),
                    Summary.Value(tag=image_tag, simple_value=image_count)
                ])
                logging.info("%s: %g, %s: %g, step: %g", self._summary_tag,
                             steps_per_sec, image_tag, image_count,
                             global_step)
            else:
                summary = Summary(value=[
                    Summary.Value(tag=self._summary_tag,
                                  simple_value=steps_per_sec)
                ])
                logging.info("%s: %g, step: %g", self._summary_tag,
                             steps_per_sec, global_step)

            self._summary_writer.add_summary(summary, global_step)
Example #19
0
def write_hptuning_metric(args, metric):
  """
  Write a summary containing the tuning loss metric, as required by hyperparam tuning.
  """
  summary = Summary(value=[Summary.Value(tag='training/hptuning/metric', simple_value=metric)])

  # for hyperparam tuning, we write a summary log to a directory 'eval' below the job directory
  eval_path = os.path.join(args['output_dir'], 'eval')
  summary_writer = tf.summary.FileWriter(eval_path)

  # Note: adding the summary to the writer is enough for hyperparam tuning.
  # The ml engine system is looking for any summary added with the hyperparam metric tag.
  summary_writer.add_summary(summary)
  summary_writer.flush()
Example #20
0
    def image(self, tag, image, step):
        image = np.asarray(image)
        if image.ndim == 2:
            image = image[:, :, None]
        if image.shape[-1] == 1:
            image = np.repeat(image, 3, axis=-1)

        bytesio = io.BytesIO()
        PIL.Image.fromarray(image).save(bytesio, 'PNG')
        image_summary = Summary.Image(encoded_image_string=bytesio.getvalue(),
                                      colorspace=3,
                                      height=image.shape[0],
                                      width=image.shape[1])
        self._write_event(Summary.Value(tag=tag, image=image_summary), step)
Example #21
0
    def _log_statistics(self, elapsed_steps, elapsed_time, global_step):
        """
        Collect and store all summary values.

        Arguments:
            elapsed_steps (int):
                The number of steps between the current trigger event and the last one.
            elapsed_time (float):
                The number of seconds between the current trigger event and the last one.
            global_step (tf.Tensor):
                Global step tensor.
        """

        # Write summary for tensorboard.
        if self._summary_writer is not None:
            summary_list = list()
            # Add only summaries.
            for gpu_id in self._gpu_statistics.keys():
                for statistic in self._gpu_statistics[gpu_id].keys():
                    # only add them if they are requested for logging.
                    if statistic in self._statistics_to_log:
                        values = self._gpu_statistics[gpu_id][statistic]
                        # Only Calculate and write average if there is data available.
                        if values:
                            avg_value = sum(values) / len(values)
                            avg_summary = Summary.Value(tag='{}/{}:{}'.format(
                                self._group_tag, gpu_id, statistic),
                                                        simple_value=avg_value)
                            summary_list.append(avg_summary)

            # Write all statistics as simple scalar summaries.
            summary = Summary(value=summary_list)
            self._summary_writer.add_summary(summary, global_step)

        # Log summaries to the logging stream.
        if not self._suppress_stdout:
            for gpu_id in self._gpu_statistics.keys():
                # Acquire a GPU device handle.
                handle = nvml.nvmlDeviceGetHandleByIndex(gpu_id)

                # Query the device name.
                name = nvml.nvmlDeviceGetName(handle).decode('utf-8')

                for statistic in self._gpu_statistics[gpu_id].keys():
                    # Log utilization information with INFO level.
                    logging.debug(
                        "%s: %s", name, '{}: {}'.format(
                            statistic,
                            self._gpu_statistics[gpu_id][statistic]))
Example #22
0
    def log_event(self, event, phase):
        """Logs the given event to the summary directory."""

        event_name = event + '_' + phase
        if self._summary_writer is None:
            logging.warning(
                'profile_logger: cannot log event "%s" '
                ' because of no summary directory', event_name)
            return

        # For now, we only need the event timestamp. No need to pass any value.
        s = Summary(value=[Summary.Value(tag=event_name, simple_value=0.0)])
        self._summary_writer.add_summary(s)
        self._summary_writer.flush()
        logging.info('profile_logger: log event "%s"', event_name)
    def add_entry(self, index, tag, value, **kwargs):
        if "image" in kwargs and value is not None:
            image_string = tf.image.encode_jpeg(value,
                                                optimize_size=True,
                                                quality=80)
            summary_value = Summary.Image(width=value.shape[1],
                                          height=value.shape[0],
                                          colorspace=value.shape[2],
                                          encoded_image_string=image_string)
        else:
            summary_value = Summary.Value(tag=tag, simple_value=value)

        if summary_value is not None:
            entry = Summary(value=[summary_value])
            self._train_writer.add_summary(entry, index)
Example #24
0
 def run_loop(self):
   # Count the steps.
   current_step = training_util.global_step(self._sess, self._sv.global_step)
   added_steps = current_step - self._last_step
   self._last_step = current_step
   # Measure the elapsed time.
   current_time = time.time()
   elapsed_time = current_time - self._last_time
   self._last_time = current_time
   # Reports the number of steps done per second
   steps_per_sec = added_steps / elapsed_time
   summary = Summary(value=[Summary.Value(tag=self._summary_tag,
                                          simple_value=steps_per_sec)])
   if self._sv.summary_writer:
     self._sv.summary_writer.add_summary(summary, current_step)
   logging.log_first_n(logging.INFO, "%s: %g", 10,
                       self._summary_tag, steps_per_sec)
Example #25
0
def confusion_matrix_summary(tag,
                             cm,
                             classes,
                             normalize=False,
                             recall=True,
                             title='Confusion matrix',
                             cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        if recall:
            s = cm.sum(axis=1)[:, np.newaxis] + np.finfo(np.float32).eps
        else:
            s = cm.sum(axis=0)[:, np.newaxis] + np.finfo(np.float32).eps

        cm = cm.astype('float') / s

    plt.close('all')

    f_size = max(5, int(0.6 * len(classes)))
    plt.figure(figsize=(f_size, f_size))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,
                 i,
                 format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

    image = plt_to_image_summary(plt)
    return Summary(value=[Summary.Value(tag=tag, image=image)])
Example #26
0
    def after_run(self, run_context, run_values):
        _ = run_context

        stale_global_step = run_values.results
        if self._timer.should_trigger_for_step(stale_global_step + 1):
            # get the real value after train op.
            global_step = run_context.session.run(self._global_step_tensor)
            if self._timer.should_trigger_for_step(global_step):
                elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
                    global_step)
                if elapsed_time is not None:
                    steps_per_sec = elapsed_steps / elapsed_time
                    if self._summary_writer is not None:
                        summary = Summary(value=[
                            Summary.Value(tag=self._summary_tag,
                                          simple_value=steps_per_sec)
                        ])
                        self._summary_writer.add_summary(summary, global_step)
                    logging.info("%s: %g", self._summary_tag, steps_per_sec)

        # Check whether the global step has been increased. Here, we do not use the
        # timer.last_triggered_step as the timer might record a different global
        # step value such that the comparison could be unreliable. For simplicity,
        # we just compare the stale_global_step with previously recorded version.
        if stale_global_step == self._last_global_step:
            # Here, we use a counter to count how many times we have observed that the
            # global step has not been increased. For some Optimizers, the global step
            # is not increased each time by design. For example, SyncReplicaOptimizer
            # doesn't increase the global step in worker's main train step.
            self._global_step_check_count += 1
            if self._global_step_check_count % 20 == 0:
                self._global_step_check_count = 0
                logging.warning(
                    "It seems that global step (tf.train.get_global_step) has not "
                    "been increased. Current value (could be stable): %s vs previous "
                    "value: %s. You could increase the global step by passing "
                    "tf.train.get_global_step() to Optimizer.apply_gradients or "
                    "Optimizer.minimize.", stale_global_step,
                    self._last_global_step)
        else:
            # Whenever we observe the increment, reset the counter.
            self._global_step_check_count = 0

        self._last_global_step = stale_global_step
Example #27
0
    def after_run(self, run_context, run_values):
        _ = run_context

        stale_local_step = run_values.results
        if stale_local_step > 0:
            if self._timer.should_trigger_for_step(stale_local_step + 1):
                # get the real value after train op.
                global_step, local_step = run_context.session.run(
                    [self._global_step_tensor, self._local_step_tensor])
                if self._timer.should_trigger_for_step(local_step):
                    elapsed_time, _ = self._timer.update_last_triggered_step(
                        local_step)
                    if elapsed_time is not None:
                        steps_per_sec = (global_step - self._last_global_step
                                         ) * self._scale / elapsed_time
                        logging.info("Speech %s: %g", self._summary_tag,
                                     steps_per_sec)
                        if self._summary_writer is not None:
                            aggregated_summary = run_context.session.run(
                                self._summary_train_op)
                            self._summary_writer.add_summary(
                                aggregated_summary, global_step)
                            summary = Summary(value=[
                                Summary.Value(tag=self._summary_tag,
                                              simple_value=steps_per_sec)
                            ])
                            self._summary_writer.add_summary(
                                summary, global_step)
                            self._exec_count += 1
                            if (self._test_every_n_steps is not None) and (
                                    self._exec_count %
                                    self._test_every_n_steps) == 0:
                                logging.info("Evaluate model start")
                                self._summary_evaluator(run_context.session)
                                aggregated_summary = run_context.session.run(
                                    self._summary_test_op)
                                self._summary_writer.add_summary(
                                    aggregated_summary, global_step)
                                logging.info("Evaluate model end")
                    self._timer.update_last_triggered_step(local_step)
                    self._last_global_step = global_step

            self._last_local_step = stale_local_step
Example #28
0
def scalar(name, scalar):
    """Outputs a `Summary` protocol buffer containing a single scalar value.
    The generated Summary has a Tensor.proto containing the input Tensor.
    Args:
      name: A name for the generated node. Will also serve as the series name in
        TensorBoard.
      scalar: A real numeric Tensor containing a single value.
      collections: Optional list of graph collections keys. The new summary op is
        added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
    Returns:
      A scalar `Tensor` of type `string`. Which contains a `Summary` protobuf.
    Raises:
      ValueError: If tensor has the wrong shape or type.
    """
    name = _clean_tag(name)
    if not isinstance(scalar, float):
        # try conversion, if failed then need handle by user.
        scalar = float(scalar)
    return Summary(value=[Summary.Value(tag=name, simple_value=scalar)])
Example #29
0
def histogram(name, values):
    # pylint: disable=line-too-long
    """Outputs a `Summary` protocol buffer with a histogram.
    The generated
    [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
    has one summary value containing a histogram for `values`.
    This op reports an `InvalidArgument` error if any value is not finite.
    Args:
      name: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      values: A real numeric `Tensor`. Any shape. Values to use to
        build the histogram.
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    name = _clean_tag(name)
    hist = make_histogram(values.astype(float))
    return Summary(value=[Summary.Value(tag=name, histo=hist)])
Example #30
0
def write_hptuning_metric(args, metric):
    """
    Output a metric measuring the success of the model
    This metric will be used by hypertuning to find the best performing model. 
    Args: 
    args: a list of parameters
    metric: the metric (e.g., test rmse) to be recorded
    
    """
    summary = Summary(value=[Summary.Value(tag='training/hptuning/metric', simple_value=metric)])
    
    # for hyperparam tuning, we write a summary log to a directory 'eval' below the job directory
    eval_path = os.path.join(args.output_dir, 'eval')
    summary_writer = tf.summary.FileWriter(eval_path)
    
    # Note: adding the summary to the writer is enough for hyperparam tuning.
    # The ml engine system is looking for any summary added with the hyperparam metric tag.
    summary_writer.add_summary(summary)
    summary_writer.flush()