def projection_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))

    term_1 = tf.norm(states - starting_states, 2)
   
    
    return -1*term_1+result
  def normalized_dist(states):
    inner = tf.multiply(states - starting_states, goals - starting_states)
    upper = tf.reduce_sum(inner, -1)
    sign = tf.sign(upper)
    
    result = sign * tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))

    term_1 = tf.square(tf.norm(states - starting_states, 2))
    term_2 = tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)))
    
    return tf.sqrt(epsilon + tf.abs(result - alpha * (term_1 - term_2)))
Ejemplo n.º 3
0
def flip_normals_towards_viewpoint(points, normals, viewpoint):
    """Flips the normals to face towards the view point.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    normals: A tf.float32 tensor of size [N, 3].
    viewpoint: A tf.float32 tensor of size [3].

  Returns:
    flipped_normals: A tf.float32 tensor of size [N, 3].
  """
    # (viewpoint - point)
    view_vector = tf.expand_dims(viewpoint, axis=0) - points
    # Dot product between the (viewpoint - point) and the plane normal
    cos_theta = tf.expand_dims(tf.reduce_sum(view_vector * normals, axis=1),
                               axis=1)
    # Revert normals where cos is negative.
    normals *= tf.sign(tf.tile(cos_theta, [1, 3]))
    return normals
Ejemplo n.º 4
0
  def _build_train_op(self, optimizer):
    """Build the TensorFlow graph used to learn the bisimulation metric.

    Args:
      optimizer: a tf.train optimizer.
    Returns:
      A TensorFlow op to minimize the bisimulation loss.
    """
    self.online_network = tf.make_template('Online',
                                           self._network_template)
    self.target_network = tf.make_template('Target',
                                           self._network_template)
    self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s1_ph')
    self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s2_ph')
    self.s1_online_distances = self.online_network(
        self._concat_states(self.s1_ph))
    self.s1_target_distances = self.target_network(
        self._concat_states(self.s1_ph))
    self.s2_target_distances = self.target_network(
        self._concat_states(self.s2_ph))
    self.action_ph = tf.placeholder(tf.int32, (self.batch_size,))
    self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,))
    # We use an expanding horizon for computing the distances.
    self.bisim_horizon_ph = tf.placeholder(tf.float64, ())
    # bisimulation_target_1 = rew_diff + gamma * next_distance.
    bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target())
    # bisimulation_target_2 = curr_distance.
    bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances)
    # We slowly taper in the maximum according to the bisim horizon.
    bisimulation_target = tf.maximum(
        bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph)
    # We zero-out diagonal entries, since those are estimating the distance
    # between a state and itself, which we know to be 0.
    diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64))
    diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1))
    bisimulation_target *= diagonal_mask
    bisimulation_estimate = self.s1_online_distances
    # We start with a mask that includes everything.
    loss_mask = tf.ones(tf.shape(bisimulation_estimate))
    # We have to enforce that states being compared are done only using the same
    # action.
    indicators = self.action_ph
    indicators = tf.cast(indicators, tf.float64)
    # indicators will initially have shape [batch_size], we first tile it:
    square_ids = tf.tile([indicators], [self.batch_size, 1])
    # We subtract square_ids from its transpose:
    square_ids = square_ids - tf.transpose(square_ids)
    # At this point all zero-entries are the ones with equal IDs.
    # Now we would like to convert the zeros in this matrix to 1s, and make
    # everything else a 0. We can do this with the following operation:
    loss_mask = 1 - tf.abs(tf.sign(square_ids))
    # Now reshape to match the shapes of the estimate and target.
    loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1))
    larger_targets = bisimulation_target - bisimulation_estimate
    larger_targets_count = tf.reduce_sum(
        tf.cast(larger_targets > 0., tf.float64))
    tf.summary.scalar('Learning/LargerTargets', larger_targets_count)
    tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask))
    tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph)
    bisimulation_loss = tf.losses.mean_squared_error(
        bisimulation_target,
        bisimulation_estimate,
        weights=loss_mask)
    tf.summary.scalar('Learning/loss', bisimulation_loss)
    # Plot average distance between sampled representations.
    average_distance = tf.reduce_mean(bisimulation_estimate)
    tf.summary.scalar('Approx/AverageDistance', average_distance)
    return optimizer.minimize(bisimulation_loss)