def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) gt_center = tf.reshape(input_boxes_center, [-1, 3]) predicted_center = tf.reshape(output_boxes_center, [-1, 3]) if loss_type == 'huber': loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type == 'absolute_difference': loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) center_losses = loss_fn(y_true=gt_center, y_pred=predicted_center) return tf.reduce_mean(center_losses * tf.reshape(weights, [-1]))
def eq_cifar_fn(x, output_dim=10, trainable=True): gconv_indices, gconv_shape_info, w_shape = gconv2d_util(h_input='Z2', h_output='C4', in_channels=3, out_channels=8, ksize=3) w = tf.get_variable('w1', shape=w_shape) conv1 = gconv2d(input=x, filter=w, strides=[1, 2, 2, 1], padding='SAME', gconv_indices=gconv_indices, gconv_shape_info=gconv_shape_info) tf.add_to_collection('conv_output1', conv1) pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) gconv_indices, gconv_shape_info, w_shape = gconv2d_util(h_input='C4', h_output='C4', in_channels=16, out_channels=32, ksize=5) w = tf.get_variable('w2', shape=w_shape) conv2 = gconv2d(input=conv1, filter=w, strides=[1, 2, 2, 1], padding='SAME', gconv_indices=gconv_indices, gconv_shape_info=gconv_shape_info) pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) gconv_indices, gconv_shape_info, w_shape = gconv2d_util(h_input='C4', h_output='C4', in_channels=8, out_channels=2, ksize=5) w = tf.get_variable('w3', shape=w_shape) conv3 = gconv2d(input=conv2, filter=w, strides=[1, 1, 1, 1], padding='SAME', gconv_indices=gconv_indices, gconv_shape_info=gconv_shape_info) conv3 = tf.reshape(conv3, conv3.get_shape().as_list()[:3] + [4] + [out_channels]) conv3 = tf.reduce_mean(conv3, axis=3) pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2) pool3_flat = tf.layers.flatten(pool3) u = pool3_flat u = tf.layers.dense(inputs=pool3_flat, units=output_dim, activation=tf.nn.relu, trainable=trainable) tf.add_to_collection('conv_output2', conv2) return u
def generate_plan(self, time_step: ActionTimeStep, state): assert self._reward_func is not None, ("specify reward function " "before planning") assert self._dynamics_func is not None, ("specify dynamics function " "before planning") self._plan_optimizer.set_cost(self._calc_cost_for_action_sequence) opt_action = self._plan_optimizer.obtain_solution(time_step, state) action = opt_action[:, 0] action = tf.reshape(action, [time_step.observation.shape[0], -1]) return action
def _build_network(self, residual_layer_size=1024, num_residual_layers=2, dropout_amount=0.5, small_context_loss_weight=0.0, max_num_distractors=-1): """Builds an MLP with residual connections. Args: residual_layer_size: Dimension for linear layer to add to MLP. num_residual_layers: Number of residual layer. dropout_amount: If training, how much dropout to use in each layer. small_context_loss_weight: If >0, in addition to the loss with many distractors, add another loss where the only distractors are the sentences of the context. max_num_distractors: The maximum number of distractors provided at each train step. Returns: The input and output tensors for the network, with the input being a placeholder variable. """ self.small_context_loss_weight = small_context_loss_weight self._max_num_distractors = max_num_distractors # x starts off with dimension [batch_size x num_sentences x emb_size]. # Convert it to [batch_size x (num_sentences*emb_size)]. x_input = tf.keras.Input( shape=[self._num_input_sentences, self._embedding_dim]) flattened_shape = [-1, self._num_input_sentences * self._embedding_dim] x = tf.reshape(x_input, flattened_shape) def block(start_x, embedding_size): x = tf.keras.layers.Dense(embedding_size, activation='relu')(start_x) x = tf.keras.layers.Dropout(dropout_amount)(x) x = tf.keras.layers.Dense(embedding_size, activation='relu')(x) return x + start_x x = tf.keras.layers.LayerNormalization(axis=1)(x) # First bring dimension down to desired. x = tf.keras.layers.Dense(residual_layer_size)(x) # Add specified number of residual layers. for _ in range(num_residual_layers): x = block(x, residual_layer_size) # Go back up to desired dimension. x = tf.keras.layers.Dense(self._embedding_dim, activation='linear')(x) x = tf.keras.layers.LayerNormalization(axis=1)(x) return x_input, x
def call(self, inputs, outer_rank): if inputs.dtype != self._sample_spec.dtype: raise ValueError( 'Inputs to NormalProjectionNetwork must match the sample_spec.dtype.' ) # outer_rank is needed because the projection is not done on the raw # observations so getting the outer rank is hard as there is no spec to # compare to. batch_squash = network_utils.BatchSquash(outer_rank) inputs = batch_squash.flatten(inputs) means = self._means_projection_layer(inputs) means = tf.reshape(means, [-1] + self._sample_spec.shape.as_list()) if self._state_dependent_std: stds = self._stddev_projection_layer(inputs) else: stds = self._bias(tf.zeros_like(means)) stds = tf.reshape(stds, [-1] + self._sample_spec.shape.as_list()) inv_stds = self._std_transform(stds) if self._max_std is not None: inv_stds += 1 / (self._max_std - self._min_std) stds = 1. / inv_stds if self._min_std > 0: stds += self._min_std stds = tf.cast(stds, self._sample_spec.dtype) means = means * stds # If not scaling the distribution later, use a normalized mean. if not self._scale_distribution and self._mean_transform is not None: means = self._mean_transform(means, self._sample_spec) means = tf.cast(means, self._sample_spec.dtype) means = batch_squash.unflatten(means) stds = batch_squash.unflatten(stds) return self.output_spec.build_distribution(loc=means, scale=stds)
def _attend(self, query, key, value, key_class_id): """Transformer attention function.""" with tf.name_scope('attend'): q_shape = tf.shape(query) v_shape = tf.shape(value) n_q = q_shape[0] h_q = q_shape[1] w_q = q_shape[2] d = q_shape[3] n_v = v_shape[0] h_v = v_shape[1] w_v = v_shape[2] c = v_shape[3] q = tf.reshape(query, [-1, d]) # [n_q*Hq*Wq, d] k = tf.reshape(key, [-1, d]) # [n_v*Hv*Wv, d] x [Nq*Hq*Wq, d] --> [n_v*Hv*Wv, Nq*Hq*Wq] logits = tf.matmul(k, q, transpose_b=True) d_scale = tf.rsqrt(tf.cast(d, logits.dtype)) # logits: [n_v, Hv*Wv, n_q*Hq*Wq] logits = tf.reshape(d_scale * logits, [n_v, h_v * w_v, -1]) # attn: [n_v, Hv*Wv, n_q*Hq*Wq] attn = self.get_support_set_softmax(logits, key_class_id) # aggregate: v = tf.reshape(value, [n_v, h_v * w_v, c]) # [n_v, Hv*Wv, n_q*Hq*Wq] x [n_v, Hv*Wv, c] --> [n_v, n_q*Hq*Wq, c] v_agg = tf.einsum('ijk,ijl->ikl', attn, v) v_agg = tf.reshape(v_agg, [n_v, n_q, h_q, w_q, c]) v_agg.set_shape([None, None, None, None, value.shape[-1]]) return v_agg # [N_c, n_q, Hq, Wq, c]
def compute_episode_stats(episode): """Computes various episode stats: way, shots, and class IDs. Args: episode: An EpisodeDataset. Returns: way: An int constant tensor. The number of classes in the episode. shots: An int 1D tensor: The number of support examples per class. class_ids: An int 1D tensor: (absolute) class IDs. """ # The train labels of the next episode. train_labels = episode.train_labels # Compute way. episode_classes, _ = tf.unique(train_labels) way = tf.size(episode_classes) # Compute shots. class_ids = tf.reshape(tf.range(way), [way, 1]) class_labels = tf.reshape(train_labels, [1, -1]) is_equal = tf.equal(class_labels, class_ids) shots = tf.reduce_sum(tf.cast(is_equal, tf.int32), axis=1) # Compute class_ids. class_ids, _ = tf.unique(episode.train_class_ids) return way, shots, class_ids
def _expand_to_population(self, data): """Expand the input tensor to a population of replications Args: data (tf.Tensor): input data with shape [batch_size, ...] Returns: data_population (tf.Tensor) with shape [batch_size * self._population_size, ...]. For example data tensor [[a, b], [c, d]] and a population_size of 2, we have the following data_population tensor as output [[a, b], [a, b], [c, d], [c, d]] """ data_population = tf.tile(tf.expand_dims( data, 1), [1, self._population_size] + [1] * len(data.shape[1:])) data_population = tf.reshape(data_population, [-1] + data.shape[1:].as_list()) return data_population
def compute_kitti_difficulty(boxes, occlusions, truncations, image_height): """Computes box difficulty as Hard(1), Moderate(2), Easy(3) or 0 (Super hard). Easy: height >=40 Px, occlusion <= 0, truncation <= 0.15 Moderate: height >=25 Px, occlusion <= 1, truncation <= 0.30 Hard: height >=25 Px, occlusion <= 2, truncation <= 0.50 Note that 'Hard' box is also 'Moderate' and 'Easy'. Returns a (N, 1) tensor containing object difficulty with following labelmap: 0: SuperHard 1: Hard 2: Moderate 3: Easy TODO(abhijitkundu): Since difficulty level is very specific to kitti, this function should be in kitti evaluation rather than detection preprocessor. Args: boxes: (N, 4) tensor of 2d boxes with [ymin, xmin, ymax, xmax] each row. occlusions: (N, 1) tensor containing box occlusion level truncations: (N, 1) tensor containing box truncation level image_height: Image height. Returns: A (N, 1) int32 tensor containing per box difficulty labels with 0 (SuperHard), 1 (Hard), 2 (Moderate) and 3 (Easy). """ # box heights in pixels heights = tf.reshape( (boxes[:, 2] - boxes[:, 0]), [-1, 1]) * tf.cast(image_height, dtype=tf.float32) # compute binary masks for each difficulty level is_easy = (heights >= 40.0) & (occlusions <= 0) & (truncations <= 0.15) is_moderate = (heights >= 25.0) & (occlusions <= 1) & (truncations <= 0.30) is_hard = (heights >= 25.0) & (occlusions <= 2) & (truncations <= 0.50) # set difficulty map difficulty = tf.maximum( tf.maximum( tf.cast(is_hard, dtype=tf.int32) * ObjectDifficulty.HARD, tf.cast(is_moderate, dtype=tf.int32) * ObjectDifficulty.MODERATE), tf.cast(is_easy, dtype=tf.int32) * ObjectDifficulty.EASY) return difficulty
def random_flip_left_right(images, flow, mask, probability): """Performs a random left/right flip.""" perform_flip = tf.less(tf.random.uniform([]), probability) # apply flip images = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(images, axis=[-2]), false_fn=lambda: images) if flow is not None: flow = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(flow, axis=[-2]), false_fn=lambda: flow) mask = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(mask, axis=[-2]), false_fn=lambda: mask) # correct sign of flow sign_correction = tf.reshape([1.0, -1.0], [1, 1, 2]) flow = tf.cond(pred=perform_flip, true_fn=lambda: flow * sign_correction, false_fn=lambda: flow) return images, flow, mask
def random_flip_up_down(images, flow=None, mask=None): """Performs a random up/down flip.""" # 50/50 chance perform_flip = tf.equal(tf.random.uniform([], maxval=2, dtype=tf.int32), 1) # apply flip images = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(images, axis=[-3]), false_fn=lambda: images) if flow is not None: flow = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(flow, axis=[-3]), false_fn=lambda: flow) mask = tf.cond(pred=perform_flip, true_fn=lambda: tf.reverse(mask, axis=[-3]), false_fn=lambda: mask) # correct sign of flow sign_correction = tf.reshape([-1.0, 1.0], [1, 1, 2]) flow = tf.cond(pred=perform_flip, true_fn=lambda: flow * sign_correction, false_fn=lambda: flow) return images, flow, mask
def _rotate(img, mask=None): if angle_radian == 0.0: # early return if no resizing is required if mask is not None: return img, mask else: return img if mask is not None: # multiply with mask, to ensure non-valid locations are zero img = tf.math.multiply(img, mask) # rotate img img_rotated = tfa_image.rotate(img, angle_radian, interpolation='BILINEAR') # rotate mask (will serve as normalization weights) mask_rotated = tfa_image.rotate(mask, angle_radian, interpolation='BILINEAR') # normalize sparse flow field and mask img_rotated = tf.math.multiply( img_rotated, tf.math.reciprocal_no_nan(mask_rotated)) mask_rotated = tf.math.multiply( mask_rotated, tf.math.reciprocal_no_nan(mask_rotated)) else: img_rotated = tfa_image.rotate(img, angle_radian, interpolation='BILINEAR') if is_flow: # If image is a flow image, scale flow values to be consistent with the # rotation. cos = tf.math.cos(angle_radian) sin = tf.math.sin(angle_radian) rotation_matrix = tf.reshape([cos, sin, -sin, cos], [2, 2]) img_rotated = tf.linalg.matmul(img_rotated, rotation_matrix) if mask is not None: return img_rotated, mask_rotated return img_rotated
def compute_prototypes(embeddings, onehot_labels): """Compute class prototypes over the last dimension of embeddings. Args: embeddings: Tensor of examples of shape [num_examples] + embedding_shape onehot_labels: Tensor of one-hot encoded labels of shape [num_examples, num_classes]. Returns: prototypes: Tensor of class prototypes of shape [num_classes, embedding_size]. """ # Sums each class' embeddings. [num classes] + embedding shape. embedding_indices = 'klm'[:len(embeddings.shape) - 1] class_sums = tf.einsum('ij,i{0}->j{0}'.format(embedding_indices), onehot_labels, embeddings) # The prototype of each class is the averaged embedding of its examples. class_num_images = tf.reduce_sum(input_tensor=onehot_labels, axis=0) # [way]. prototypes = tf.math.divide_no_nan( class_sums, tf.reshape(class_num_images, [-1] + [1] * (len(embeddings.shape) - 1))) return prototypes