def projection_dist(states): inner = tf.multiply(states - starting_states, goals - starting_states) upper = tf.reduce_sum(inner, -1) sign = tf.sign(upper) result = tf.math.divide(upper, tf.norm(goals - starting_states, ord=2)) term_1 = tf.norm(states - starting_states, 2) return -1*term_1+result
def normalized_dist(states): inner = tf.multiply(states - starting_states, goals - starting_states) upper = tf.reduce_sum(inner, -1) sign = tf.sign(upper) result = sign * tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))) term_1 = tf.square(tf.norm(states - starting_states, 2)) term_2 = tf.square(tf.math.divide(upper, tf.norm(goals - starting_states, ord=2))) return tf.sqrt(epsilon + tf.abs(result - alpha * (term_1 - term_2)))
def flip_normals_towards_viewpoint(points, normals, viewpoint): """Flips the normals to face towards the view point. Args: points: A tf.float32 tensor of size [N, 3]. normals: A tf.float32 tensor of size [N, 3]. viewpoint: A tf.float32 tensor of size [3]. Returns: flipped_normals: A tf.float32 tensor of size [N, 3]. """ # (viewpoint - point) view_vector = tf.expand_dims(viewpoint, axis=0) - points # Dot product between the (viewpoint - point) and the plane normal cos_theta = tf.expand_dims(tf.reduce_sum(view_vector * normals, axis=1), axis=1) # Revert normals where cos is negative. normals *= tf.sign(tf.tile(cos_theta, [1, 3])) return normals
def _build_train_op(self, optimizer): """Build the TensorFlow graph used to learn the bisimulation metric. Args: optimizer: a tf.train optimizer. Returns: A TensorFlow op to minimize the bisimulation loss. """ self.online_network = tf.make_template('Online', self._network_template) self.target_network = tf.make_template('Target', self._network_template) self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s1_ph') self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2), name='s2_ph') self.s1_online_distances = self.online_network( self._concat_states(self.s1_ph)) self.s1_target_distances = self.target_network( self._concat_states(self.s1_ph)) self.s2_target_distances = self.target_network( self._concat_states(self.s2_ph)) self.action_ph = tf.placeholder(tf.int32, (self.batch_size,)) self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,)) # We use an expanding horizon for computing the distances. self.bisim_horizon_ph = tf.placeholder(tf.float64, ()) # bisimulation_target_1 = rew_diff + gamma * next_distance. bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target()) # bisimulation_target_2 = curr_distance. bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances) # We slowly taper in the maximum according to the bisim horizon. bisimulation_target = tf.maximum( bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph) # We zero-out diagonal entries, since those are estimating the distance # between a state and itself, which we know to be 0. diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64)) diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1)) bisimulation_target *= diagonal_mask bisimulation_estimate = self.s1_online_distances # We start with a mask that includes everything. loss_mask = tf.ones(tf.shape(bisimulation_estimate)) # We have to enforce that states being compared are done only using the same # action. indicators = self.action_ph indicators = tf.cast(indicators, tf.float64) # indicators will initially have shape [batch_size], we first tile it: square_ids = tf.tile([indicators], [self.batch_size, 1]) # We subtract square_ids from its transpose: square_ids = square_ids - tf.transpose(square_ids) # At this point all zero-entries are the ones with equal IDs. # Now we would like to convert the zeros in this matrix to 1s, and make # everything else a 0. We can do this with the following operation: loss_mask = 1 - tf.abs(tf.sign(square_ids)) # Now reshape to match the shapes of the estimate and target. loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1)) larger_targets = bisimulation_target - bisimulation_estimate larger_targets_count = tf.reduce_sum( tf.cast(larger_targets > 0., tf.float64)) tf.summary.scalar('Learning/LargerTargets', larger_targets_count) tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask)) tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph) bisimulation_loss = tf.losses.mean_squared_error( bisimulation_target, bisimulation_estimate, weights=loss_mask) tf.summary.scalar('Learning/loss', bisimulation_loss) # Plot average distance between sampled representations. average_distance = tf.reduce_mean(bisimulation_estimate) tf.summary.scalar('Approx/AverageDistance', average_distance) return optimizer.minimize(bisimulation_loss)