def _host_call_fn(gs, loss, lr):
    """Training host call.

  Creates scalar summaries for training metrics.

  This function is executed on the CPU and should not directly reference
  any Tensors in the rest of the `model_fn`. To pass Tensors from the
  model to the `metric_fn`, provide as part of the `host_call`. See
  https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
  for more information.

  Arguments should match the list of `Tensor` objects passed as the second
  element in the tuple passed to `host_call`.

  Args:
    gs: `Tensor with shape `[batch]` for the global_step
    loss: `Tensor` with shape `[batch]` for the training loss.
    lr: `Tensor` with shape `[batch]` for the learning_rate.

  Returns:
    List of summary ops to run on the CPU host.
  """
    # Host call fns are executed FLAGS.iterations_per_loop times after one
    # TPU loop is finished, setting max_queue value to the same as number of
    # iterations will make the summary writer only flush the data to storage
    # once per loop.
    gs = gs[0]
    with summary.create_file_writer(
            FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default():
        with summary.always_record_summaries():
            summary.scalar("loss", loss[0], step=gs)
            summary.scalar("learning_rate", lr[0], step=gs)
            return summary.all_summary_ops()
Exemple #2
0
def train(model, optimizer, dataset, step_counter, log_interval=None):
    """Trains model on `dataset` using `optimizer`."""
    from tensorflow.contrib import summary as contrib_summary  # pylint: disable=g-import-not-at-top

    start = time.time()
    for (batch, (images, labels)) in enumerate(dataset):
        with contrib_summary.record_summaries_every_n_global_steps(
                10, global_step=step_counter):
            # Record the operations used to compute the loss given the input,
            # so that the gradient of the loss with respect to the variables
            # can be computed.
            with tf.GradientTape() as tape:
                logits = model(images, training=True)
                loss_value = loss(logits, labels)
                contrib_summary.scalar('loss', loss_value)
                contrib_summary.scalar('accuracy',
                                       compute_accuracy(logits, labels))
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(list(zip(grads, model.variables)),
                                      global_step=step_counter)
            if log_interval and batch % log_interval == 0:
                rate = log_interval / (time.time() - start)
                print('Step #%d\tLoss: %.6f (%d steps/sec)' %
                      (batch, loss_value, rate))
                start = time.time()
Exemple #3
0
    def _update_critic_ddpg(self, obs, action, next_obs, reward, mask):
        """Updates parameters of ddpg critic given samples from the batch.

    Args:
       obs: A tfe.Variable with a batch of observations.
       action: A tfe.Variable with a batch of actions.
       next_obs: A tfe.Variable with a batch of next observations.
       reward: A tfe.Variable with a batch of rewards.
       mask: A tfe.Variable with a batch of masks.
    """
        if self.use_absorbing_state:
            # Starting from the goal state we can execute only non-actions.
            a_mask = tf.maximum(0, mask)
            q_next = self.critic_target(next_obs,
                                        self.actor_target(next_obs) * a_mask)
            q_target = reward + self.discount * q_next
        else:
            # Without an absorbing state we assign rewards of 0.
            q_next = self.critic_target(next_obs, self.actor_target(next_obs))
            q_target = reward + self.discount * mask * q_next

        with tf.GradientTape() as tape:
            q_pred = self.critic(obs, action)
            critic_loss = tf.losses.mean_squared_error(q_target, q_pred)

        grads = tape.gradient(critic_loss, self.critic.variables)
        self.critic_optimizer.apply_gradients(zip(grads,
                                                  self.critic.variables),
                                              global_step=self.critic_step)

        with contrib_summary.record_summaries_every_n_global_steps(
                100, self.critic_step):
            contrib_summary.scalar('critic/loss',
                                   critic_loss,
                                   step=self.critic_step)
Exemple #4
0
 def _compute_gradients(self,
                        actions,
                        discounted_rewards,
                        weights=None,
                        sequence_length=None,
                        loss_str='train',
                        use_entropy_regularization=True,
                        **kwargs):
   """Implement the policy gradient in TF."""
   if sequence_length is not None:
     seq_mask = tf.sequence_mask(sequence_length, dtype=tf.float32)
   else:
     seq_mask = None
   with tf.GradientTape(watch_accessed_variables=False) as tape:
     tape.watch(self.trainable_variables)
     # Returns 0.0 if critic is not being used
     value_loss = self._compute_value_loss(
         discounted_rewards, seq_mask=seq_mask, **kwargs)
     policy_loss = self._compute_policy_loss(
         discounted_rewards,
         actions,
         seq_mask=seq_mask,
         weights=weights,
         use_entropy_regularization=use_entropy_regularization,
         **kwargs)
     loss = tf.reduce_mean(policy_loss + value_loss)
   if self.log_summaries and (self._counter % self.log_every == 0):
     contrib_summary.scalar('{}_loss'.format(loss_str), loss)
   return tape.gradient(loss, self.trainable_variables)
Exemple #5
0
 def host_call_fn(gs, g_loss, d_loss, real_audio, generated_audio):
   """Training host call. Creates scalar summaries for training metrics.
   This function is executed on the CPU and should not directly reference
   any Tensors in the rest of the `model_fn`. To pass Tensors from the
   model to the `metric_fn`, provide as part of the `host_call`. See
   https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
   for more information.
   Arguments should match the list of `Tensor` objects passed as the second
   element in the tuple passed to `host_call`.
   Args:
     gs: `Tensor with shape `[batch]` for the global_step
     g_loss: `Tensor` with shape `[batch]` for the generator loss.
     d_loss: `Tensor` with shape `[batch]` for the discriminator loss.
     real_audio: `Tensor` with shape `[batch, 8192, 1]`
     generated_audio: `Tensor` with shape `[batch, 8192, 1]`
   Returns:
     List of summary ops to run on the CPU host.
   """
   gs = gs[0]
   with summary.create_file_writer(FLAGS.model_dir).as_default():
       with summary.always_record_summaries():
           summary.scalar('g_loss', g_loss, step=gs)
           summary.scalar('d_loss', d_loss, step=gs)
           summary.audio('real_audio', real_audio, sample_rate=_FS, max_outputs=10, step=gs)
           summary.audio('generated_audio', generated_audio, sample_rate=_FS, max_outputs=10, step=gs)
   return summary.all_summary_ops()
def _host_call_fn(gs, loss, lr):
  """Training host call.

  Creates scalar summaries for training metrics.

  This function is executed on the CPU and should not directly reference
  any Tensors in the rest of the `model_fn`. To pass Tensors from the
  model to the `metric_fn`, provide as part of the `host_call`. See
  https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
  for more information.

  Arguments should match the list of `Tensor` objects passed as the second
  element in the tuple passed to `host_call`.

  Args:
    gs: `Tensor with shape `[batch]` for the global_step
    loss: `Tensor` with shape `[batch]` for the training loss.
    lr: `Tensor` with shape `[batch]` for the learning_rate.

  Returns:
    List of summary ops to run on the CPU host.
  """
  # Host call fns are executed FLAGS.iterations_per_loop times after one
  # TPU loop is finished, setting max_queue value to the same as number of
  # iterations will make the summary writer only flush the data to storage
  # once per loop.
  gs = gs[0]
  with summary.create_file_writer(
      FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default():
    with summary.always_record_summaries():
      summary.scalar("loss", loss[0], step=gs)
      summary.scalar("learning_rate", lr[0], step=gs)
      return summary.all_summary_ops()
Exemple #7
0
    def _update_actor(self, obs, mask):
        """Updates parameters of critic given samples from the batch.

    Args:
       obs: A tfe.Variable with a batch of observations.
       mask: A tfe.Variable with a batch of masks.
    """
        with tf.GradientTape() as tape:
            if self.use_td3:
                q_pred, _ = self.critic(obs, self.actor(obs))
            else:
                q_pred = self.critic(obs, self.actor(obs))
            if self.use_absorbing_state:
                # Don't update the actor for absorbing states.
                # And skip update if all states are absorbing.
                a_mask = 1.0 - tf.maximum(0, -mask)
                if tf.reduce_sum(a_mask) < 1e-8:
                    return
                actor_loss = -tf.reduce_sum(
                    q_pred * a_mask) / tf.reduce_sum(a_mask)
            else:
                actor_loss = -tf.reduce_mean(q_pred)

        grads = tape.gradient(actor_loss, self.actor.variables)
        # Clipping makes training more stable.
        grads, _ = tf.clip_by_global_norm(grads, 40.0)
        self.actor_optimizer.apply_gradients(zip(grads, self.actor.variables),
                                             global_step=self.actor_step)

        with contrib_summary.record_summaries_every_n_global_steps(
                100, self.actor_step):
            contrib_summary.scalar('actor/loss',
                                   actor_loss,
                                   step=self.actor_step)
Exemple #8
0
 def host_call_fn(*tensors):
     """Training host call. Creates scalar summaries for
     training metrics. This function is executed on the CPU and
     should not directly reference any Tensors in the rest of
     the `model_fn`. To pass Tensors from the  model to
     the `metric_fn`, provide as part of the `host_call`. See
     https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
     for more information. Arguments should match the list of
     `Tensor` objects passed as the second element in the tuple
      passed to `host_call`.
     """
     gs = tensors[0][0]
     # Host call fns are executed params['iterations_per_loop']
     # times after one TPU loop is finished, setting max_queue
     # value to the same as number of iterations will make the
     # summary writer only flush the data to storage once per
     # loop.
     summary_writer = summary.create_file_writer(
         config.checkpoint_dir,
         max_queue=config.get('iterations_per_loop', 1000))
     with summary_writer.as_default():
         with summary.always_record_summaries():
             for idx in range(len(tensors_to_print)):
                 summary.scalar(tensors_to_print_names[idx],
                                tensors[idx][0],
                                step=gs)
             return summary.all_summary_ops()
Exemple #9
0
    def host_call_fn(global_step, *args):
        """Training host call. Creates scalar summaries for training metrics.

    This function is executed on the CPU and should not directly reference
    any Tensors in the rest of the `model_fn`. To pass Tensors from the
    model to the `metric_fn`, provide as part of the `host_call`. See
    https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
    for more information.

    Arguments should match the list of `Tensor` objects passed as the second
    element in the tuple passed to `host_call`.

    Args:
      global_step: `Tensor with shape `[batch]` for the global_step
      *args: Remaining tensors to log.

    Returns:
      List of summary ops to run on the CPU host.
    """
        step = global_step[0]
        with contrib_summary.create_file_writer(
                logdir=model_dir, filename_suffix=".host_call").as_default():
            with contrib_summary.always_record_summaries():
                for i, name in enumerate(metric_names):
                    contrib_summary.scalar(prefix + name,
                                           args[i][0],
                                           step=step)

                return contrib_summary.all_summary_ops()
Exemple #10
0
    def eval_metrics_host_call_fn(policy_output,
                                  value_output,
                                  pi_tensor,
                                  policy_cost,
                                  value_cost,
                                  l2_cost,
                                  combined_cost,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(
            tf.reduce_sum(policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)
        policy_output_top_1 = tf.argmax(policy_output, axis=1)

        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1,
                       tf.shape(policy_output)[1]))

        metric_ops = {
            'policy_cost':
            tf.metrics.mean(policy_cost),
            'value_cost':
            tf.metrics.mean(value_cost),
            'l2_cost':
            tf.metrics.mean(l2_cost),
            'policy_entropy':
            tf.metrics.mean(policy_entropy),
            'combined_cost':
            tf.metrics.mean(combined_cost),
            'policy_accuracy_top_1':
            tf.metrics.accuracy(labels=policy_target_top_1,
                                predictions=policy_output_top_1),
            'policy_accuracy_top_3':
            tf.metrics.mean(policy_output_in_top3),
            'policy_top_1_confidence':
            tf.metrics.mean(policy_top_1_confidence),
            'policy_target_top_1_confidence':
            tf.metrics.mean(policy_target_top_1_confidence),
            'value_confidence':
            tf.metrics.mean(tf.abs(value_output)),
        }

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.model_dir)
        with summary_writer.as_default(), \
                summary.record_summaries_every_n_global_steps(FLAGS.summary_steps):
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1])

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops
        return summary.all_summary_ops()
Exemple #11
0
 def host_call_fn(**kwargs):
     writer = contrib_summary.create_file_writer(summary_dir,
                                                 max_queue=1000)
     always_record = contrib_summary.always_record_summaries()
     with writer.as_default(), always_record:
         for name, scalar in kwargs.items():
             contrib_summary.scalar(name, tf.reduce_mean(scalar))
         return contrib_summary.all_summary_ops()
Exemple #12
0
    def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost,
                                  value_cost, l2_cost, combined_cost, step,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(tf.reduce_sum(
            policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)

        policy_output_in_top1 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=1))
        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1]))

        with tf.variable_scope("metrics"):
            metric_ops = {
                'policy_cost': tf.metrics.mean(policy_cost),
                'value_cost': tf.metrics.mean(value_cost),
                'l2_cost': tf.metrics.mean(l2_cost),
                'policy_entropy': tf.metrics.mean(policy_entropy),
                'combined_cost': tf.metrics.mean(combined_cost),

                'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1),
                'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3),
                'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence),
                'policy_target_top_1_confidence': tf.metrics.mean(
                    policy_target_top_1_confidence),
                'value_confidence': tf.metrics.mean(tf.abs(value_output)),
            }

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops

        # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps.
        eval_step = tf.reduce_min(step)

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.work_dir)
        with summary_writer.as_default(), \
                summary.record_summaries_every_n_global_steps(
                    params['summary_steps'], eval_step):
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1], step=eval_step)

        # Reset metrics occasionally so that they are mean of recent batches.
        reset_op = tf.variables_initializer(tf.local_variables("metrics"))
        cond_reset_op = tf.cond(
            tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)),
            lambda: reset_op,
            lambda: tf.no_op())

        return summary.all_summary_ops() + [cond_reset_op]
Exemple #13
0
 def host_call_fn(gs, *summary_tensors):
     gs = gs[0]
     with contrib_summary.create_file_writer(
             FLAGS.workdir).as_default():
         with contrib_summary.always_record_summaries():
             for name, reshaped_tensor in zip(
                     summary_names, summary_tensors):
                 contrib_summary.scalar(
                     name, tf.reduce_mean(reshaped_tensor), step=gs)
             return contrib_summary.all_summary_ops()
Exemple #14
0
 def host_call_fn(gs, lr):
     # Outfeed supports int32 but global_step is expected to be int64.
     gs = tf.cast(tf.reduce_mean(gs), tf.int64)
     with summary.create_file_writer(
             self.model_dir).as_default():
         with summary.always_record_summaries():
             summary.scalar('learning_rate',
                            tf.reduce_mean(lr),
                            step=gs)
             return summary.all_summary_ops()
Exemple #15
0
 def _evaluate_eager(self, data_name):
     loss_metric = tfe.metrics.Mean('loss')
     accuracy_metric = tfe.metrics.Mean('accuracy')
     for (features, labels) in tfe.Iterator(self.data[data_name]):
         logits = self.model(features, training=False)
         loss_metric(self.loss(labels, logits))
         accuracy_metric(self.accuracy(labels, logits))
     avg_loss, avg_accuracy = loss_metric.result(), accuracy_metric.result()
     summary.scalar('loss', avg_loss)
     summary.scalar('accuracy', avg_accuracy)
     return avg_loss, avg_accuracy
Exemple #16
0
 def host_call_fn(global_step, *tensors):
     """Training host call."""
     global_step = global_step[0]
     with contrib_summary.create_file_writer(summary_dir +
                                             '/metrics').as_default():
         with contrib_summary.always_record_summaries():
             for i, tensor in enumerate(tensors):
                 contrib_summary.scalar(names[i],
                                        tensor[0],
                                        step=global_step)
             return contrib_summary.all_summary_ops()
Exemple #17
0
 def _train_eager_one_epoch(self):
     for (batch, (features,
                  labels)) in enumerate(tfe.Iterator(self.data['train'])):
         with tfe.GradientTape() as tape:
             logits = self.model(features, training=True)
             train_loss = self.loss(labels, logits)
             train_accuracy = self.accuracy(labels, logits)
         grads = tape.gradient(train_loss, self.model.variables)
         self.optimizer.apply_gradients(zip(grads, self.model.variables),
                                        global_step=self.step_counter)
         summary.scalar('loss', train_loss)
         summary.scalar('accuracy', train_accuracy)
Exemple #18
0
 def _host_loss_summary(global_step, tf_loss, **scalars):
   """Add summary.scalar in host side."""
   gs = tf.cast(global_step, tf.int64)
   sum_loss = contrib_summary.scalar(
       '{}_loss'.format(train_or_eval), tf_loss, step=gs)
   sum_ops = [sum_loss.op]
   for description, tf_metric in scalars.iteritems():
     sum_metric = contrib_summary.scalar(
         '{}_{}'.format(train_or_eval, description), tf_metric, step=gs)
     sum_ops.append(sum_metric)
   with tf.control_dependencies(sum_ops):
     return tf.identity(tf_loss)
Exemple #19
0
 def host_call_fn(global_step, *tensors):
     """Training host call."""
     global_step = global_step[0]
     with contrib_summary.create_file_writer(
             params.output_dir).as_default():
         with contrib_summary.record_summaries_every_n_global_steps(
                 n=params.log_every, global_step=global_step):
             for i, tensor in enumerate(tensors):
                 if 'images' not in names[i]:
                     contrib_summary.scalar(names[i],
                                            tensor[0],
                                            step=global_step)
             return contrib_summary.all_summary_ops()
Exemple #20
0
 def update(self, samples, contexts, dev_samples, dev_contexts):
   if self._counter % 20 == 0:
     # To prevent memory leaks in tf eager
     tf.set_random_seed(self._seed)
   actions, rews, weights, kwargs = self.create_batch(
       samples, contexts=contexts)
   dev_actions, dev_rews, dev_weights, dev_kwargs = self.create_batch(
       dev_samples, contexts=dev_contexts)
   trajs = (s.traj for s in samples)
   with tf.GradientTape(
       watch_accessed_variables=False, persistent=True) as tape0:
     tape0.watch(self._score_vars)
     scores = self.compute_scores(trajs, return_tensors=True)
     scores = [
         tf.nn.softmax(x)
         for x in tf.split(scores, len(actions) // 10, axis=0)
     ]
     scores = tf.concat(scores, axis=0)
     rews = rews * tf.expand_dims(scores, axis=-1)
     grads = self._compute_gradients(actions, rews, weights, **kwargs)
     grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm)
     grads_and_vars = zip(grads, self.trainable_variables)
     new_vars = [v - self.learning_rate * g for g, v in grads_and_vars]
   self.optimizer.apply_gradients(grads_and_vars)
   grads_loss = self._compute_gradients(
       dev_actions,
       dev_rews,
       dev_weights,
       loss_str='dev',
       use_entropy_regularization=False,
       **dev_kwargs)
   score_grads = tape0.gradient(
       new_vars, self._score_vars, output_gradients=grads_loss)
   del tape0
   score_grads_and_vars = self._score_grad_clipping(
       zip(score_grads, self._score_vars))
   self.score_optimizer.apply_gradients(
       score_grads_and_vars, global_step=self.global_step)
   if self.log_summaries:
     grads = list(zip(*grads_and_vars)[0])
     score_grads = list(zip(*score_grads_and_vars)[0])
     contrib_summary.scalar('global_norm/train_grad', tf.global_norm(grads))
     contrib_summary.scalar('global_norm/meta_grad',
                            tf.global_norm(score_grads))
   if self._debug and (self._counter % self.log_every == 0):
     tf.print(
         'Epoch {} scores='.format(self._counter),
         scores[:20],
         summarize=10,
         output_stream=sys.stdout)
   self._counter += 1
Exemple #21
0
 def host_call_fn(gs,
                  loss,
                  lr,
                  mix=None,
                  gt_sources=None,
                  est_sources=None):
     """Training host call. Creates scalar summaries for training metrics.
         This function is executed on the CPU and should not directly reference
         any Tensors in the rest of the `model_fn`. To pass Tensors from the
         model to the `metric_fn`, provide as part of the `host_call`. See
         https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
         for more information.
         Arguments should match the list of `Tensor` objects passed as the second
         element in the tuple passed to `host_call`.
         Args:
           gs: `Tensor with shape `[batch]` for the global_step
           loss: `Tensor` with shape `[batch]` for the training loss.
           lr: `Tensor` with shape `[batch]` for the learning_rate.
           input: `Tensor` with shape `[batch, mix_samples, 1]`
           gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
           est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
         Returns:
           List of summary ops to run on the CPU host.
         """
     gs = gs[0]
     with summary.create_file_writer(
             model_config["model_base_dir"] + os.path.sep +
             str(model_config["experiment_id"])).as_default():
         with summary.always_record_summaries():
             summary.scalar('loss', loss[0], step=gs)
             summary.scalar('learning_rate', lr[0], step=gs)
         if gs % 10000 == 0:
             with summary.record_summaries_every_n_global_steps(
                     model_config["audio_summaries_every_n_steps"]):
                 summary.audio('mix',
                               mix,
                               model_config['expected_sr'],
                               max_outputs=model_config["num_sources"])
                 for source_id in range(gt_sources.shape[1].value):
                     summary.audio('gt_sources_{source_id}'.format(
                         source_id=source_id),
                                   gt_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
                     summary.audio('est_sources_{source_id}'.format(
                         source_id=source_id),
                                   est_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
     return summary.all_summary_ops()
Exemple #22
0
    def _setup_graph_training(self, accuracy, loss, optimizer):
        data_types, data_shapes = self.data['train'].output_types, \
                                  self.data['train'].output_shapes
        self._iterator = tf.data.Iterator.from_structure(
            data_types, data_shapes)
        self._inits = {
            'train': self._iterator.make_initializer(self.data['train']),
            'val': self._iterator.make_initializer(self.data['val']),
            'test': self._iterator.make_initializer(self.data['test'])
        }
        features, labels = self._iterator.get_next()
        training = tf.placeholder(tf.bool, name='training')

        self.logits = self.model(features, training=training)
        self.accuracy = accuracy(labels, self.logits)
        self.loss = loss(labels, self.logits)
        with self.summary.as_default(), summary.always_record_summaries():
            summary.scalar('train_loss', self.loss)
            summary.scalar('train_accuracy', self.accuracy)
        with self.test_summary.as_default(), summary.always_record_summaries():
            summary.scalar('val_loss',
                           tf.placeholder(tf.float32, name='avg_loss'))
            summary.scalar('val_accuracy',
                           tf.placeholder(tf.float32, name='avg_accuracy'))
        # Needed for BatchNorm to work
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            self.optimizer = optimizer.minimize(self.loss,
                                                global_step=self.step_counter)
Exemple #23
0
        def host_call_fn(*args):
          """Host call function to compute training summaries."""
          scalars = _list_to_dicts(args, scalars_to_summarize.keys())[0]
          for name in scalars:
            scalars[name] = scalars[name][0]

          with contrib_summary.create_file_writer(
              summary_dir, max_queue=1000).as_default():
            with contrib_summary.always_record_summaries():
              for name, value in scalars.items():
                if name not in ["global_step"]:
                  contrib_summary.scalar(
                      name, value, step=scalars["global_step"])

          return contrib_summary.all_summary_ops()
Exemple #24
0
            def host_call_fn(gs, lr):
                """Training host call. Creates scalar summaries for training metrics.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          lr: `Tensor` with shape `[batch]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(
                        params['model_dir']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('learning_rate', lr[0], step=gs)
                        return summary.all_summary_ops()
def host_call_fn(model_dir, **kwargs):
    """host_call function used for creating training summaries when using TPU.

  Args:
    model_dir: String indicating the output_dir to save summaries in.
    **kwargs: Set of metric names and tensor values for all desired summaries.

  Returns:
    Summary op to be passed to the host_call arg of the estimator function.
  """
    gs = kwargs.pop('global_step')[0]
    with summary.create_file_writer(model_dir).as_default():
        with summary.always_record_summaries():
            for name, tensor in kwargs.iteritems():
                summary.scalar(name, tensor[0], step=gs)
            return summary.all_summary_ops()
Exemple #26
0
def test(model, dataset):
    """Perform an evaluation of `model` on the examples from `dataset`."""
    from tensorflow.contrib import summary as contrib_summary  # pylint: disable=g-import-not-at-top
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    accuracy = tf.keras.metrics.Accuracy('accuracy', dtype=tf.float32)

    for (images, labels) in dataset:
        logits = model(images, training=False)
        avg_loss.update_state(loss(logits, labels))
        accuracy.update_state(tf.argmax(logits, axis=1, output_type=tf.int64),
                              tf.cast(labels, tf.int64))
    print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' %
          (avg_loss.result(), 100 * accuracy.result()))
    with contrib_summary.always_record_summaries():
        contrib_summary.scalar('loss', avg_loss.result())
        contrib_summary.scalar('accuracy', accuracy.result())
Exemple #27
0
    def reconstruction_loss(self, ignore_values={}, summaries=False):

        value_gt = {
            nt.id: []
            for nt in self.tree_def.node_types if nt.value_type is not None
        }
        value = {
            nt.id: []
            for nt in self.tree_def.node_types if nt.value_type is not None
        }

        # first gather all the tensors and then compute the loss is ~3.5 time faster
        def gather(node):
            if node.value is not None:
                value[node.node_type_id].append(node.value.index)
                value_gt[node.node_type_id].append(
                    node.tr_gt_value.abstract_value)
            list(map(gather, node.children))

        list(map(gather, self.decoded_trees))

        sample_distrib_error = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.get_stacked('distribs_idx'),
            logits=self.get_stacked('distribs_unscaled'))

        d_loss = tf.reduce_mean(sample_distrib_error)

        v_loss = 0
        for k in value.keys():
            if len(value[k]) > 0 and k not in ignore_values:
                vt = self.tree_def.id_map[k].value_type
                all_value_gt = vt.abstract_to_representation_batch(value_gt[k])
                all_value_gen = tf.gather(self['vals_' + k], value[k])
                if vt.class_value:
                    vk_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=all_value_gt, logits=all_value_gen)
                else:
                    vk_loss = tf.reduce_mean(tf.square(all_value_gen -
                                                       all_value_gt),
                                             axis=-1)

                reduced = tf.reduce_mean(vk_loss, axis=-1)
                tfs.scalar("loss/tr/values/" + k, reduced)
                v_loss += reduced  # TODO handle the mixing of losses from different domain (i.e. properly weight them)

        return d_loss, v_loss
Exemple #28
0
    def summary(self, inputs, outputs, losses, step):
        super().summary(inputs, outputs, losses, step)

        with summary.always_record_summaries():
            if step.numpy() % 20 == 0:
                summary.image('summary/tube',
                              inputs['rgb'],
                              max_images=1,
                              step=step)
                summary.image('summary/map',
                              inputs['map'],
                              max_images=1,
                              step=step)
                summary.image('summary/output',
                              outputs['map'],
                              max_images=1,
                              step=step)
                summary.scalar('summary/loss', losses['loss'], step=step)
Exemple #29
0
        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        with summary.create_file_writer(FLAGS.model_dir).as_default():
          with summary.always_record_summaries():
            summary.scalar('loss', loss[0], step=gs)
            summary.scalar('learning_rate', lr[0], step=gs)
            summary.scalar('current_epoch', ce[0], step=gs)

            return summary.all_summary_ops()
Exemple #31
0
            def host_call_fn(gs, loss, acc, ce):  #lr, ce):
                '''Training host call. Creates scalar summaries for training
                    metrics.
                    This funciton is executed on the CPU. As in, after 
                    :iterations_per_loop computation in TPU, control moves to
                    the CPU where the summaries are updated.
                    Arguments should match the list of 'Tensor' objects passed as
                    the second element in the tuple passed to 'host_call'.
                Args:
                    gs: Tensor with shape [batch] for global step
                    loss: Tensor with shape [batch] for the training loss
                    lr: Tensor with shape [batch] for the learning rate
                    ce: Tensor with shape [batch] for the current epoch

                Returns:
                    List of summary ops to run on the CPU host.
                '''
                gs = gs[0]
                # Host call fns are executed FLAGS.iterations_per_loop times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the
                # data to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=FLAGS.iterations_per_loop).as_default():

                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('top_1', acc[0], step=gs)
                        #summary.scalar('top_5', t5_acc[0], step=gs)
                        #summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()
Exemple #32
0
    def host_call_fn(global_step, total_loss, cross_entropy,
                     regularization_loss, learning_rate, train_accuracy):

        global_step = global_step[0]
        with summary.create_file_writer(
                params['model_dir'],
                max_queue=params['iterations_per_loop']).as_default():
            with summary.always_record_summaries():
                summary.scalar('entire_loss', total_loss[0], step=global_step)
                summary.scalar('cross_entropy_loss',
                               cross_entropy[0],
                               step=global_step)
                summary.scalar('regularization_loss',
                               regularization_loss[0],
                               step=global_step)
                summary.scalar('learning_rate',
                               learning_rate[0],
                               step=global_step)
                summary.scalar('train_accuracy',
                               train_accuracy[0],
                               step=global_step)
                return summary.all_summary_ops()
Exemple #33
0
        def host_call_fn(gs, lpl, dcl, ls):
            gs = gs[0]
            with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default():
                with summary.always_record_summaries():
                    summary.scalar('label_prediction_loss', lpl[0], step=gs)
                    summary.scalar('domain_classification_loss', dcl[0], step=gs)
                    summary.scalar('loss', ls[0], step=gs)

            return summary.all_summary_ops()