Beispiel #1
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")

        policy_loss = -tf.reduce_mean(self.policy.logli * adv)
        value_loss = tf.reduce_mean(
            (self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        try:
            with open("loss_fn.txt", "x+") as f:
                f.write("out\n")
                f.write("full_loss: {0} type: {1}\n".format(
                    type(full_loss), full_loss.dtype))
                f.write("policy_loss: {0} type: {1}\n".format(
                    type(policy_loss), policy_loss.dtype))
                f.write("value_loss: {0} type: {1}\n".format(
                    type(value_loss), value_loss.dtype))
                f.write("entropy_loss: {0} type: {1}\n".format(
                    type(entropy_loss), entropy_loss.dtype))
                f.write("adv: {0} type: {1}\n".format(type(adv), adv.dtype))
                f.write("returns: {0} type: {1}\n".format(
                    type(returns), returns.dtype))
                f.close()
        except FileExistsError:
            print("")

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns]
 def true_fn(images):
   if augment_entire_batch:
     image_2 = images
     mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True)
     print(mean_color.shape)
   else:
     image_1, image_2 = tf.unstack(images)
     mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True)
   def body(var_img, mean_color):
     x0 = tf.random.uniform([], 0, width, dtype=tf.int32)
     y0 = tf.random.uniform([], 0, height, dtype=tf.int32)
     dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
     dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
     x = tf.range(width)
     x_mask = (x0 <= x) & (x < x0+dx)
     y = tf.range(height)
     y_mask = (y0 <= y) & (y < y0+dy)
     mask = x_mask & y_mask[:, tf.newaxis]
     mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype)
     result = var_img * (1 - mask) + mean_color * mask
     return result
   # Perform at least one erase operation.
   image_2 = body(image_2, mean_color)
   # Perform additional erase operations.
   for _ in range(max_operations - 1):
     perform_erase = tf.less(
         tf.random.uniform([]), probability_additional_operations)
     image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color),
                       lambda: image_2)
   if augment_entire_batch:
     images = image_2
   else:
     images = tf.stack([image_1, image_2])
   return images
Beispiel #3
0
    def _mine(self, x_in, y_in):
        """Mutual Infomation Neural Estimator.

        Implement mutual information neural estimator from
        Belghazi et al "Mutual Information Neural Estimation"
        http://proceedings.mlr.press/v80/belghazi18a/belghazi18a.pdf
        'DV':  sup_T E_P(T) - log E_Q(exp(T))
        where P is the joint distribution of X and Y, and Q is the product
         marginal distribution of P. DV is a lower bound for
         KLD(P||Q)=MI(X, Y).

        """
        y_in_tran = transpose2(y_in, 1, 0)
        y_shuffle_tran = math_ops.shuffle(y_in_tran)
        y_shuffle = transpose2(y_shuffle_tran, 1, 0)

        # propagate the forward pass
        T_xy, _ = self._network([x_in, y_in])
        T_x_y, _ = self._network([x_in, y_shuffle])

        # compute the negative loss (maximize loss == minimize -loss)
        mean_exp_T_x_y = tf.reduce_mean(tf.math.exp(T_x_y), axis=1)
        loss = tf.reduce_mean(T_xy, axis=1) - tf.math.log(mean_exp_T_x_y)
        loss = tf.squeeze(loss, axis=-1)  # Mutual Information

        return loss
Beispiel #4
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")
        value_old = tf.placeholder(tf.float32, [None], name="value_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1 - self.clip_ratio,
                                         1 + self.clip_ratio)

        value_err = (self.value - returns)**2
        if self.clip_value > 0.0:
            clipped_value = tf.clip_by_value(self.value,
                                             value_old - self.clip_value,
                                             value_old + self.clip_value)
            clipped_value_err = (clipped_value - returns)**2
            value_err = tf.maximum(value_err, clipped_value_err)

        policy_loss = -tf.reduce_mean(
            tf.minimum(adv * ratio, adv * clipped_ratio))
        value_loss = tf.reduce_mean(value_err) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss,
                           entropy_loss], [adv, returns, logli_old, value_old]
 def _summary():
     with tf.name_scope('ActorCriticLoss'):
         tf.summary.scalar("values", tf.reduce_mean(value))
         tf.summary.scalar("returns", tf.reduce_mean(returns))
         tf.summary.scalar("advantages", tf.reduce_mean(advantages))
         tf.summary.scalar("explained_variance_of_return_by_value",
                           common.explained_variance(value, returns))
Beispiel #6
0
def quantile_loss(y, y_hat, k=4):
    k = np.linspace(0., 1., k)
    loss = 0.
    y = tf.squeeze(y, axis=2)
    for idx, q in enumerate(k):
        error = tf.subtract(y, y_hat[:, :, idx])
        loss += tf.reduce_mean(tf.maximum(q * error, (q - 1) / error), axis=-1)
    return tf.reduce_mean(loss)
Beispiel #7
0
    def _loss_op(self):
        with tf.name_scope("loss_op"):
            self.d_loss = tf.reduce_mean(self._fake_d) - tf.reduce_mean(
                self._true_d)
            self.g_loss = -tf.reduce_mean(self._fake_d)

            # reg = self._reg(tf.shape(self.x)[0], self.d, self.x, self.x_fake)
            # self.d_loss += reg
            self.loss = [self.d_loss, self.g_loss]
Beispiel #8
0
 def _summary():
     with self.name_scope:
         tf.summary.scalar("values", tf.reduce_mean(value))
         tf.summary.scalar("returns", tf.reduce_mean(returns))
         tf.summary.scalar("advantages/mean",
                           tf.reduce_mean(advantages))
         tf.summary.histogram("advantages/value", advantages)
         tf.summary.scalar("explained_variance_of_return_by_value",
                           common.explained_variance(value, returns))
Beispiel #9
0
def _variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)
Beispiel #10
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")

        policy_loss = -tf.reduce_mean(self.policy.logli * adv)
        value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns]
Beispiel #11
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")

        policy_loss = -tf.reduce_mean(self.policy.logli * adv)
        value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns]
def summarize_stats(stats):
  """Summarize a dictionary of variables.

  Args:
    stats: a dictionary of {name: tensor} to compute stats over.
  """
  for name, stat in stats.items():
    mean = tf.reduce_mean(stat)
    tf.summary.scalar('mean_%s' % name, mean)
    tf.summary.scalar('max_%s' % name, tf.reduce_max(stat))
    tf.summary.scalar('min_%s' % name, tf.reduce_min(stat))
    std = tf.sqrt(tf.reduce_mean(tf.square(stat)) - tf.square(mean) + 1e-10)
    tf.summary.scalar('std_%s' % name, std)
    tf.summary.histogram(name, stat)
Beispiel #13
0
    def train_step(self,
                   time_step: ActionTimeStep,
                   state,
                   calc_intrinsic_reward=True):
        """
        Args:
            time_step (ActionTimeStep): input time_step data for ICM
            state (Tensor): state for ICM (previous observation)
            calc_intrinsic_reward (bool): if False, only return the losses
        Returns:
            TrainStep:
                outputs: empty tuple ()
                state: observation
                info (ICMInfo):
        """
        feature = time_step.observation
        prev_action = time_step.prev_action

        if self._encoding_net is not None:
            feature, _ = self._encoding_net(feature)
        prev_feature = state
        prev_action = self._encode_action(prev_action)

        forward_pred, _ = self._forward_net(
            inputs=[tf.stop_gradient(prev_feature), prev_action])
        forward_loss = 0.5 * tf.reduce_mean(
            tf.square(tf.stop_gradient(feature) - forward_pred), axis=-1)

        action_pred, _ = self._inverse_net(inputs=[prev_feature, feature])

        if tensor_spec.is_discrete(self._action_spec):
            inverse_loss = tf.nn.softmax_cross_entropy_with_logits(
                labels=prev_action, logits=action_pred)
        else:
            inverse_loss = 0.5 * tf.reduce_mean(
                tf.square(prev_action - action_pred), axis=-1)

        intrinsic_reward = ()
        if calc_intrinsic_reward:
            intrinsic_reward = tf.stop_gradient(forward_loss)
            intrinsic_reward = self._reward_normalizer.normalize(
                intrinsic_reward)

        return AlgorithmStep(
            outputs=(),
            state=feature,
            info=ICMInfo(reward=intrinsic_reward,
                         loss=LossInfo(loss=forward_loss + inverse_loss,
                                       extra=dict(forward_loss=forward_loss,
                                                  inverse_loss=inverse_loss))))
Beispiel #14
0
    def test_ppo(self):
        env_class = PolicyUnittestEnv
        learning_rate = 1e-1
        iterations = 20
        batch_size = 100
        steps_per_episode = 13
        env = env_class(batch_size, steps_per_episode)
        env = TFPyEnvironment(env)

        eval_env = env_class(batch_size, steps_per_episode)
        eval_env = TFPyEnvironment(eval_env)

        algorithm = create_algorithm(env, learning_rate=learning_rate)
        driver = SyncOffPolicyDriver(env,
                                     algorithm,
                                     debug_summaries=DEBUGGING,
                                     summarize_grads_and_vars=DEBUGGING)
        replayer = driver.exp_replayer
        eval_driver = OnPolicyDriver(eval_env,
                                     algorithm,
                                     training=False,
                                     greedy_predict=True)

        env.reset()
        eval_env.reset()
        time_step = driver.get_initial_time_step()
        policy_state = driver.get_initial_policy_state()
        for i in range(iterations):
            time_step, policy_state = driver.run(max_num_steps=batch_size *
                                                 steps_per_episode,
                                                 time_step=time_step,
                                                 policy_state=policy_state)

            experience = replayer.replay_all()
            driver.train(experience, num_updates=4, mini_batch_size=25)
            replayer.clear()
            eval_env.reset()
            eval_time_step, _ = eval_driver.run(
                max_num_steps=(steps_per_episode - 1) * batch_size)
            logging.info("%d reward=%f", i,
                         float(tf.reduce_mean(eval_time_step.reward)))

        eval_env.reset()
        eval_time_step, _ = eval_driver.run(
            max_num_steps=(steps_per_episode - 1) * batch_size)
        logging.info("reward=%f", float(tf.reduce_mean(eval_time_step.reward)))
        self.assertAlmostEqual(1.0,
                               float(tf.reduce_mean(eval_time_step.reward)),
                               delta=1e-1)
Beispiel #15
0
def fit_gaussian(embeddings, damping=1e-7, full_covariance=False):
  """Fits a unimodal Gaussian distribution to `embeddings`.

  Args:
    embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings.
    damping: The scale of the covariance damping coefficient.
    full_covariance: Whether to use a full or diagonal covariance.

  Returns:
    Parameter estimates (means and log variances) for a Gaussian model.
  """
  if full_covariance:
    num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2)
    num, dim = tf.squeeze(num), tf.squeeze(dim)
    sample_mean = tf.reduce_mean(input_tensor=embeddings, axis=0)
    centered_embeddings = embeddings - sample_mean
    sample_covariance = tf.einsum('ij,ik->kj', centered_embeddings,
                                  centered_embeddings)  # Outer product.
    sample_covariance += damping * tf.eye(dim)  # Positive definiteness.
    sample_covariance /= tf.cast(num, dtype=tf.float32)  # Scale by N.
    return sample_mean, sample_covariance
  else:
    sample_mean, sample_variances = tf.nn.moments(x=embeddings)
    log_variances = tf.math.log(sample_variances +
                                damping * tf.ones_like(sample_variances))
    return sample_mean, log_variances
Beispiel #16
0
    def get_train_op():
      loss = tf.reduce_mean(input_tensor=tf.square(q_clicked - target_clicked))
      if self.summary_writer is not None:
        with tf.variable_scope('Losses'):
          tf.summary.scalar('Loss', loss)

      return loss
 def fn():
   """Loss function for when number of input and output boxes is positive."""
   if is_balanced:
     weights = loss_utils.get_balanced_loss_weights_multiclass(
         labels=input_boxes_instance_id)
   else:
     weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                       dtype=tf.float32)
   gt_length = tf.reshape(input_boxes_length, [-1, 1])
   gt_height = tf.reshape(input_boxes_height, [-1, 1])
   gt_width = tf.reshape(input_boxes_width, [-1, 1])
   predicted_length = tf.reshape(output_boxes_length, [-1, 1])
   predicted_height = tf.reshape(output_boxes_height, [-1, 1])
   predicted_width = tf.reshape(output_boxes_width, [-1, 1])
   predicted_length /= gt_length
   predicted_height /= gt_height
   predicted_width /= gt_width
   predicted_size = tf.concat(
       [predicted_length, predicted_height, predicted_width], axis=1)
   gt_size = tf.ones_like(predicted_size)
   if loss_type == 'huber':
     loss_fn = tf.keras.losses.Huber(
         delta=delta, reduction=tf.keras.losses.Reduction.NONE)
   elif loss_type == 'absolute_difference':
     loss_fn = tf.keras.losses.MeanAbsoluteError(
         reduction=tf.keras.losses.Reduction.NONE)
   else:
     raise ValueError(('Unknown loss type %s.' % loss_type))
   size_losses = loss_fn(y_true=gt_size, y_pred=predicted_size)
   return tf.reduce_mean(size_losses * tf.reshape(weights, [-1]))
Beispiel #18
0
    def train_step(self, inputs, state, calc_intrinsic_reward=True):
        """
        Args:
            inputs (tuple): observation
            state (tuple):  empty tuple ()
            calc_intrinsic_reward (bool): if False, only return the losses
        Returns:
            TrainStep:
                outputs: empty tuple ()
                state: empty tuple ()
                info: RNDInfo
        """
        observation, _ = inputs
        if self._observation_normalizer is not None:
            observation = self._observation_normalizer.normalize(observation)

        pred_embedding, _ = self._predictor_net(observation)
        target_embedding, _ = self._target_net(observation)

        loss = 0.5 * tf.reduce_mean(
            tf.square(pred_embedding - tf.stop_gradient(target_embedding)),
            axis=-1)

        intrinsic_reward = ()
        if calc_intrinsic_reward:
            intrinsic_reward = tf.stop_gradient(loss)
            intrinsic_reward = self._reward_normalizer.normalize(
                intrinsic_reward)

        return AlgorithmStep(outputs=(),
                             state=(),
                             info=RNDInfo(reward=intrinsic_reward,
                                          loss=LossInfo(loss=loss)))
Beispiel #19
0
 def _summary_op(self):
     with tf.name_scope("summary_op"):
         # self._summary_list += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
         metrics = regr_metrics(y=self.y, y_hat=self.y_hat)
         metrics = {k: tf.reduce_mean(v) for k, v in metrics.items()}
         self._summary_dict.update(metrics)
         self.summary = summary_op(t_dict=self._summary_dict)
Beispiel #20
0
        def _forward_pass(iteration_idx_, variables_mapping_, images_,
                          onehot_labels_):
            """Helper function to compute the outputs of a forward pass."""

            with self.embedding_fn.reparameterize(variables_mapping_):
                # TODO(eringrant): Implement non-transductive batch normalization (i.e.,
                # pass the support set statistics through the query set forward pass.
                embeddings_ = self.embedding_fn(images_, training=True)

            # TODO(eringrant): `head_fn` is an attribute of the subclass.
            with self.head_fn.reparameterize(variables_mapping_):
                predictions_ = self.head_fn(embeddings_)[:, :data.way]

            accuracy_ = tf.reduce_mean(input_tensor=self.compute_accuracy(
                onehot_labels=onehot_labels_, predictions=predictions_))

            inner_objective_ = self.inner_objective(
                onehot_labels=onehot_labels_,
                predictions=predictions_,
                iteration_idx=iteration_idx_)

            outer_objective_ = self.outer_objective(
                onehot_labels=onehot_labels_,
                predictions=predictions_,
            )

            return ForwardPass(
                embeddings=embeddings_,
                predictions=predictions_,
                inner_objective_value=inner_objective_,
                outer_objective_value=outer_objective_,
                accuracy=accuracy_,
            )
Beispiel #21
0
    def _compute_prototype_loss(self,
                                embeddings,
                                labels,
                                labels_one_hot,
                                prototypes=None):
        """Computes the loss and accuracy on an episode."""
        labels_dense = labels
        if prototypes is None:
            # Compute protos.
            labels = tf.cast(labels_one_hot, tf.float32)
            # [num examples, 1, embedding size].
            embeddings_ = tf.expand_dims(embeddings, 1)
            # [num examples, num classes, 1].
            labels = tf.expand_dims(labels, 2)
            # Sums each class' embeddings. [num classes, embedding size].
            class_sums = tf.reduce_sum(labels * embeddings_, 0)
            # The prototype of each class is the averaged embedding of its examples.
            class_num_images = tf.reduce_sum(labels, 0)  # [way].
            prototypes = class_sums / class_num_images  # [way, embedding size].

        # Compute logits.
        embeddings = tf.nn.l2_normalize(embeddings, 1, epsilon=1e-3)
        prototypes = tf.nn.l2_normalize(prototypes, 1, epsilon=1e-3)
        logits = tf.matmul(embeddings, prototypes, transpose_b=True)

        loss = self.compute_loss(labels_one_hot, logits)
        acc = tf.reduce_mean(self.compute_accuracy(labels_dense, logits))
        return loss, acc, prototypes, logits
Beispiel #22
0
  def _build_train_op(self):
    """Builds the training op for Rainbow.

    Returns:
      train_op: An op performing one step of training.
    """
    target_distribution = tf.stop_gradient(self._build_target_distribution())

    # size of indices: batch_size x 1.
    indices = tf.range(tf.shape(self._replay_logits)[0])[:, None]
    # size of reshaped_actions: batch_size x 2.
    reshaped_actions = tf.concat([indices, self._replay.actions[:, None]], 1)
    # For each element of the batch, fetch the logits for its selected action.
    chosen_action_logits = tf.gather_nd(self._replay_logits, reshaped_actions)

    loss = tf.nn.softmax_cross_entropy_with_logits(
        labels=target_distribution,
        logits=chosen_action_logits)

    optimizer = tf.train.AdamOptimizer(
        learning_rate=self.learning_rate,
        epsilon=self.optimizer_epsilon)

    update_priorities_op = self._replay.tf_set_priority(
        self._replay.indices, tf.sqrt(loss + 1e-10))

    target_priorities = self._replay.tf_get_priority(self._replay.indices)
    target_priorities = tf.math.add(target_priorities, 1e-10)
    target_priorities = 1.0 / tf.sqrt(target_priorities)
    target_priorities /= tf.reduce_max(target_priorities)

    weighted_loss = target_priorities * loss

    with tf.control_dependencies([update_priorities_op]):
      return optimizer.minimize(tf.reduce_mean(weighted_loss)), weighted_loss
Beispiel #23
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # define simple model
            with tf.variable_scope('last_layer'):
                self.z = tf.layers.dense(inputs=self.x, units=self.num_classes)

            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # saving the weights of last layer when running bootstrap algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars)

            self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size)

            # SGD optimizer for the last layer
            grads_vars_sgd = self.gd_opt.compute_gradients(self.loss)
            self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd)

            for g, v in grads_vars_sgd:
                if g is not None:
                    s = list(v.name)
                    s[v.name.rindex(':')] = '_'
                    tf.summary.histogram(''.join(s) + '/grad_hist_boot_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('Loading the network...')
            # Restores from checkpoint
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')
Beispiel #24
0
        def spatial_loss(truth_features, predicted_features, space_desc):
            feature_losses = []
            for truth, prediction, spec in zip(truth_features,
                                               predicted_features,
                                               space_desc.features):
                if spec.type == FeatureType.CATEGORICAL:
                    truth = tf.transpose(truth, (0, 2, 3, 1))
                    prediction = tf.transpose(prediction, (0, 2, 3, 1))
                    feature_losses.append(
                        tf.losses.softmax_cross_entropy(truth, prediction))

                    summary_image = tf.argmax(
                        tf.concat([truth, prediction], 2), 3)
                    summary_image = tf.gather(
                        palette[space_desc.index][spec.index], summary_image)
                    tf.summary.image(spec.name, summary_image)
                else:
                    feature_losses.append(
                        tf.losses.mean_squared_error(truth, prediction))

                    summary_image = tf.concat([truth, prediction], 3)
                    tf.summary.image(spec.name,
                                     tf.transpose(summary_image, (0, 2, 3, 1)))

                tf.summary.scalar(spec.name, feature_losses[-1])

            return tf.reduce_mean(tf.stack(feature_losses))
Beispiel #25
0
def safety_critic_loss(tf_agent,
                       safety_critic,
                       time_steps,
                       actions,
                       next_time_steps,
                       safety_rewards,
                       weights=None):
  """Returns a critic loss with safety."""
  next_actions, next_log_pis = tf_agent._actions_and_log_probs(  # pylint: disable=protected-access
      next_time_steps)
  del next_log_pis
  target_input = (next_time_steps.observation[0], next_actions[0])
  target_q_values, unused_network_state1 = safety_critic(
      target_input, next_time_steps.step_type[0])
  target_q_values = tf.nn.sigmoid(target_q_values)
  safety_rewards = tf.to_float(safety_rewards)

  td_targets = tf.stop_gradient(safety_rewards + (1 - safety_rewards) *
                                next_time_steps.discount * target_q_values)
  td_targets = tf.squeeze(td_targets)

  pred_input = (time_steps.observation[0], actions[0])
  pred_td_targets, unused_network_state1 = safety_critic(
      pred_input, time_steps.step_type[0])
  loss = tf.losses.sigmoid_cross_entropy(td_targets, pred_td_targets)

  if weights is not None:
    loss *= tf.to_float(tf.squeeze(weights))

  # Take the mean across the batch.
  loss = tf.reduce_mean(input_tensor=loss)
  return loss
    def _build_train_op(self):
        """Builds the training op for Rainbow.

    Returns:
      train_op: An op performing one step of training.
    """

        replay_action_one_hot = tf.one_hot(self._replay.actions,
                                           self.num_actions,
                                           1.,
                                           0.,
                                           name='action_one_hot')
        replay_chosen_q = tf.reduce_sum(self._replay_qs *
                                        replay_action_one_hot,
                                        reduction_indices=1,
                                        name='replay_chosen_q')

        target = tf.stop_gradient(self._build_target_q_op())
        loss = tf.losses.huber_loss(target,
                                    replay_chosen_q,
                                    reduction=tf.losses.Reduction.NONE)

        update_priorities_op = self._replay.tf_set_priority(
            self._replay.indices, tf.sqrt(loss + 1e-10))

        target_priorities = self._replay.tf_get_priority(self._replay.indices)
        target_priorities = tf.math.add(target_priorities, 1e-10)
        target_priorities = 1.0 / tf.sqrt(target_priorities)
        target_priorities /= tf.reduce_max(target_priorities)

        weighted_loss = target_priorities * loss

        with tf.control_dependencies([update_priorities_op]):
            return self.optimizer.minimize(
                tf.reduce_mean(weighted_loss)), weighted_loss
Beispiel #27
0
    def loss_fn(self):
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")
        logli_old = tf.placeholder(tf.float32, [None], name="logli_old")

        ratio = tf.exp(self.policy.logli - logli_old)
        clipped_ratio = tf.clip_by_value(ratio, 1-self.clip_ratio, 1+self.clip_ratio)

        policy_loss = -tf.reduce_mean(tf.minimum(adv * ratio, adv * clipped_ratio))
        # TODO clip value loss
        value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return full_loss, [policy_loss, value_loss, entropy_loss], [adv, returns, logli_old]
def tanh_similarity(states,
                    actions,
                    rewards,
                    next_states,
                    contexts,
                    mse_scale=1.0,
                    state_scales=1.0,
                    goal_scales=1.0,
                    summarize=False):
  """Returns the similarity between next_states and contexts using tanh and mse.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    actions: A [batch_size, num_action_dims] Tensor representing a batch
      of actions.
    rewards: A [batch_size] Tensor representing a batch of rewards.
    next_states: A [batch_size, num_state_dims] Tensor representing a batch
      of next states.
    contexts: A list of [batch_size, num_context_dims] Tensor representing
      a batch of contexts.
    mse_scale: A float, to scale mse before tanh.
    state_scales: multiplicative scale for (next) states. A scalar or 1D tensor,
      must be broadcastable to number of state dimensions.
    goal_scales: multiplicative scale for contexts. A scalar or 1D tensor,
      must be broadcastable to number of goal dimensions.
    summarize: (boolean) enable summary ops.


  Returns:
    A new tf.float32 [batch_size] rewards Tensor, and
      tf.float32 [batch_size] discounts tensor.
  """
  del states, actions, rewards  # Unused
  mse = tf.reduce_mean(tf.squared_difference(next_states * state_scales,
                                             contexts[0] * goal_scales), -1)
  tanh = tf.tanh(mse_scale * mse)
  if summarize:
    with tf.name_scope('RewardFn/'):
      tf.summary.scalar('mean_mse', tf.reduce_mean(mse))
      tf.summary.histogram('mse', mse)
      tf.summary.scalar('mean_tanh', tf.reduce_mean(tanh))
      tf.summary.histogram('tanh', tanh)
  rewards = tf.to_float(1 - tanh)
  return rewards, tf.ones_like(rewards)
 def fn():
     """Loss function for when number of input and output boxes is positive."""
     if is_balanced:
         weights = loss_utils.get_balanced_loss_weights_multiclass(
             labels=input_boxes_instance_id)
     else:
         weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                           dtype=tf.float32)
     normalized_box_size = 5.0
     predicted_boxes_length = output_boxes_length
     predicted_boxes_height = output_boxes_height
     predicted_boxes_width = output_boxes_width
     predicted_boxes_center = output_boxes_center
     predicted_boxes_rotation_matrix = output_boxes_rotation_matrix
     gt_boxes_length = input_boxes_length
     gt_boxes_height = input_boxes_height
     gt_boxes_width = input_boxes_width
     gt_boxes_center = input_boxes_center
     gt_boxes_rotation_matrix = input_boxes_rotation_matrix
     if loss_type in ['normalized_huber', 'normalized_euclidean']:
         predicted_boxes_length /= (gt_boxes_length / normalized_box_size)
         predicted_boxes_height /= (gt_boxes_height / normalized_box_size)
         predicted_boxes_width /= (gt_boxes_width / normalized_box_size)
         gt_boxes_length = tf.ones_like(
             gt_boxes_length, dtype=tf.float32) * normalized_box_size
         gt_boxes_height = tf.ones_like(
             gt_boxes_height, dtype=tf.float32) * normalized_box_size
         gt_boxes_width = tf.ones_like(
             gt_boxes_width, dtype=tf.float32) * normalized_box_size
     gt_box_corners = box_utils.get_box_corners_3d(
         boxes_length=gt_boxes_length,
         boxes_height=gt_boxes_height,
         boxes_width=gt_boxes_width,
         boxes_rotation_matrix=gt_boxes_rotation_matrix,
         boxes_center=gt_boxes_center)
     predicted_box_corners = box_utils.get_box_corners_3d(
         boxes_length=predicted_boxes_length,
         boxes_height=predicted_boxes_height,
         boxes_width=predicted_boxes_width,
         boxes_rotation_matrix=predicted_boxes_rotation_matrix,
         boxes_center=predicted_boxes_center)
     corner_weights = tf.tile(weights, [1, 8])
     if loss_type in ['huber', 'normalized_huber']:
         loss_fn = tf.keras.losses.Huber(
             delta=delta, reduction=tf.keras.losses.Reduction.NONE)
     elif loss_type in [
             'normalized_absolute_difference', 'absolute_difference'
     ]:
         loss_fn = tf.keras.losses.MeanAbsoluteError(
             reduction=tf.keras.losses.Reduction.NONE)
     else:
         raise ValueError(('Unknown loss type %s.' % loss_type))
     box_corner_losses = loss_fn(y_true=tf.reshape(gt_box_corners, [-1, 3]),
                                 y_pred=tf.reshape(predicted_box_corners,
                                                   [-1, 3]))
     return tf.reduce_mean(box_corner_losses *
                           tf.reshape(corner_weights, [-1]))
Beispiel #30
0
    def _reg(cls, batch_size, d, x, x_fake, beta=1e-1):
        alpha = tf.random_uniform(shape=[batch_size, 1], minval=0., maxval=1.)
        interpolates = alpha * x + (1 - alpha) * x_fake
        int_d = d(interpolates)
        gradients = tf.gradients(int_d, [interpolates])[0]

        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        return beta * tf.reduce_mean((slopes - 1)**2)
Beispiel #31
0
    def entropy_loss(self):
        with tf.name_scope('entropy_loss'):
            entropies = [
                dist.entropy() for name, dist in self.model.policy.items()
            ]
            entropy = tf.reduce_mean(tf.add_n(entropies))
            entropy_loss = -entropy * self.entropy_factor

        entropy_masked = tf.stack(entropies, axis=-1) * tf.gather(
            self.function_args_mask, self.input_actions['function_id'])
        entropy_masked = tf.reduce_mean(tf.reduce_sum(entropy_masked, axis=-1))
        tf.summary.scalar('policy_entropy', entropy, family='entropy')
        tf.summary.scalar('policy_entropy_masked',
                          entropy_masked,
                          family='entropy')
        tf.summary.scalar('entropy_loss', entropy_loss, family='losses')

        return entropy_loss
Beispiel #32
0
    def loss_fn(self):
        """
      Sample trajectories and fit a cost function C. Form grad estimate with C
      and take a TRPO step for next policy.
      """
        adv = tf.placeholder(tf.float32, [None], name="advantages")
        returns = tf.placeholder(tf.float32, [None], name="returns")

        policy_loss = -tf.reduce_mean(self.policy.logli * adv)
        # value_loss = tf.reduce_mean((self.value - returns)**2) * self.value_coef
        value_loss = tf.reduce_mean(self.value - returns)

        entropy_loss = tf.reduce_mean(self.policy.entropy) * self.entropy_coef
        # we want to reduce policy and value errors, and maximize entropy
        # but since optimizer is minimizing the signs are opposite
        full_loss = policy_loss + value_loss - entropy_loss

        return value_loss