def _GetBetaGamma(self, theta, inputs, **kwargs): p = self.params assert 'class_emb' in kwargs class_emb = kwargs['class_emb'] # class_emb is a one-hot vector of shape [batch, class_emb_dim=num_classes]. class_ids = tf.math.argmax(class_emb, axis=-1, output_type=tf.int32) # [batch, dim] # Not using matmul/einsum to avoid potential precision problem on TPU with # sparse inputs. beta = tf.gather(theta.beta, class_ids) gamma = tf.gather(theta.gamma, class_ids) if not p.gamma_zero_init and not p.gamma_one_init: # Note, The real gamma to use is 1 + gamma. gamma = 1.0 + gamma # Extend to [batch, 1, ... 1, dim] batch = py_utils.GetShape(inputs)[0] to_shape = tf.concat( [[batch], tf.ones([py_utils.GetRank(inputs) - 2], tf.int32), [self.params.dim]], axis=0) beta = tf.reshape(beta, to_shape) gamma = tf.reshape(gamma, to_shape) return beta, gamma
def ComputePredictions(self, theta, input_batch): # Forward through layers. act = self.extract.FProp(theta.extract, input_batch.data) # Avg pool if py_utils.GetRank(act) == 4: act = tf.reduce_mean(act, axis=[1, 2]) act = py_utils.HasRank(act, 2) logits = self.softmax.Logits(theta.softmax, act) return py_utils.NestedMap(act=act, logits=logits)
def _TestStreamStepHelper(self, **kwargs): """Main helper method.""" batch_size, max_seqlen, input_dim = 2, 32, kwargs['input_dim'] stride = kwargs.get('stride', 1) # max_seqlen is divisible by stride. assert max_seqlen % stride == 0 right_context = kwargs.get('right_context', 0) # Prepares inputs. inputs, paddings = self._GetInputs(batch_size, max_seqlen, input_dim) # Gets params p = self._GetParams(**kwargs) # Builds graph. with self.session(use_gpu=False) as sess: l = p.Instantiate() init_op = tf.global_variables_initializer() fprop_out = self._FProp(l, inputs, paddings) base_outputs = self._GetFPropOutput(fprop_out) out_rank = py_utils.GetRank(base_outputs) base_outputs *= py_utils.AppendDims(1. - paddings, out_rank - 2) try: state = l.zero_state(batch_size) except TypeError: state = l.zero_state(l.theta, batch_size) outputs = [] for i in range(max_seqlen // stride + int(math.ceil(right_context / stride))): if i < max_seqlen // stride: step_inputs = inputs[:, stride * i:stride * (i + 1)] step_paddings = paddings[:, stride * i:stride * (i + 1)] else: step_inputs = tf.zeros_like(inputs[:, 0:stride]) step_paddings = tf.ones_like(paddings[:, 0:stride]) output, _, state = l.StreamStep(l.theta, step_inputs, step_paddings, state) outputs.append(output) outputs = tf.concat(outputs, axis=1) outputs = self._NormalizeStreamStepOutput(outputs, paddings, right_context, max_seqlen) sess.run(init_op) expected, actual = sess.run([base_outputs, outputs]) print(f'expected: {repr(expected)}, {expected.shape}') print(f'actual: {repr(actual)}, {actual.shape}') print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}') print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}') tol = kwargs.get('tol', 1e-6) self.assertAllClose(expected, actual, atol=tol, rtol=tol)
def _NormalizeStreamStepOutput(self, outputs, paddings, right_context, max_seqlen, num_layers=1): # outputs has right_context * num_layers-frames delay from inputs. outputs = outputs[:, right_context * num_layers:] # later outputs corresponds to padded inputs to complete the last frame's # right context. outputs = outputs[:, :max_seqlen] out_rank = py_utils.GetRank(outputs) paddings = paddings[:, :max_seqlen] return outputs * py_utils.AppendDims(1. - paddings, out_rank - 2)
def ComputeLoss(self, theta, predictions, input_batch): p = self.params batch = tf.shape(input_batch.data)[0] act = predictions.act with tf.ops.colocate_with(act): tf.logging.info("{}'s device: {}".format(act, act.device)) # Softmax if py_utils.GetRank(input_batch.label) == 1: # Create one_hot labels if rank is 1. labels = tf.cast(input_batch.label, tf.int64) onehot_labels = tf.one_hot(labels, p.softmax.num_classes) else: onehot_labels = input_batch.label labels = tf.math.argmax(onehot_labels, axis=-1) if p.label_smoothing > 0: smooth_positives = 1.0 - p.label_smoothing smooth_negatives = p.label_smoothing / p.softmax.num_classes onehot_labels = onehot_labels * smooth_positives + smooth_negatives xent = self.softmax.FProp( theta=theta.softmax, inputs=act, class_weights=input_batch.weight, class_probabilities=onehot_labels) self._AddSummary(input_batch, xent.per_example_argmax) rets = { 'loss': (xent.avg_xent, batch), 'log_pplx': (xent.avg_xent, batch), 'num_preds': (batch, 1), } if self.do_eval or p.compute_accuracy_for_training: acc1 = self._Accuracy(1, xent.logits, labels, input_batch.weight) acc5 = self._Accuracy(5, xent.logits, labels, input_batch.weight) rets.update( accuracy=(acc1, batch), acc5=(acc5, batch), error=(1. - acc1, batch), error5=(1. - acc5, batch)) return rets, {'loss': xent.per_example_xent}
def _StreamMoments(self, inputs, paddings, cached_sum, cached_count, cached_var): """Computes mean and variance over the valid data points in inputs. Args: inputs: [B, T, F, N, G] or [B, T, N, G] paddings: [B, T, 1, 1, 1] or [B, T, 1, 1] cached_sum: [B, 1, 1, N, 1] or [B, 1, N, 1] cached_count: same shape as cached_sum. cached_var: same shape as cached_sum. Returns: mean: [B, T, 1, N, 1] or [B, T, N, 1] variance: same shape as mean. new_cached_sum: same shape as cached_sum. new_cached_count: same shape as cached_count. """ tf.logging.vlog(1, 'inputs: %r', inputs) tf.logging.vlog(1, 'paddings: %r', paddings) tf.logging.vlog(1, 'cached_sum: %r', cached_sum) tf.logging.vlog(1, 'cached_count: %r', cached_count) inputs = py_utils.ApplyPadding(paddings, inputs, use_select=False) input_rank = py_utils.GetRank(inputs) assert input_rank is not None, (f'inputs rank must be staic for ' f'{repr(inputs)}') reduce_over_dims = list(range(input_rank)) # Skip B, T, and N. Reduce {F,G} or just G. reduce_over_dims = reduce_over_dims[2:-2] + reduce_over_dims[-1:] tf.logging.vlog(1, 'reduce_over_dims: %s', reduce_over_dims) # [B, T, 1, N, 1] or [B, T, N, 1] sum_v = tf.reduce_sum(inputs, reduce_over_dims, keepdims=True) sum_v = tf.math.cumsum(sum_v, axis=1) sum_v += cached_sum # [B, T, 1, 1, 1] or [B, T, 1, 1] mask = tf.cast(1.0 - paddings, inputs.dtype) count_v = tf.reduce_sum(mask, reduce_over_dims, keepdims=True) count_v = tf.math.cumsum(count_v, axis=1) input_shape = py_utils.GetShape(inputs) if input_rank == 4: # F * G multiplier = input_shape[-1] * input_shape[-3] else: # G multiplier = input_shape[-1] count_v *= multiplier count_v += cached_count tf.logging.vlog(1, 'sum_v: %r', sum_v) tf.logging.vlog(1, 'count_v: %r', count_v) mean = sum_v / tf.maximum(count_v, 1.0) sum_vv = tf.reduce_sum(py_utils.ApplyPadding( paddings, tf.math.squared_difference(inputs, mean), use_select=False), reduce_over_dims, keepdims=True) sum_vv = tf.math.cumsum(sum_vv, axis=1) sum_vv += cached_var cached_sum = sum_v[:, -1:] cached_count = count_v[:, -1:] cached_var = sum_vv[:, -1:] variance = py_utils.with_dependencies([ py_utils.assert_greater_equal(sum_vv, tf.cast(0, sum_vv.dtype)), ], sum_vv / tf.maximum(count_v, 1.0)) return mean, variance, cached_sum, cached_count, cached_var
def _StreamMoments(self, inputs, paddings, cached_sum, cached_count, cached_var): """Computes mean and variance over the valid data points in inputs. Args: inputs: [B, T, F, N, G] or [B, T, N, G] paddings: [B, T, 1, 1, 1] or [B, T, 1, 1] cached_sum: [B, 1, 1, N, 1] or [B, 1, N, 1] cached_count: same shape as cached_sum. cached_var: same shape as cached_sum. Returns: mean: [B, T, 1, N, 1] or [B, T, N, 1] variance: same shape as mean. new_cached_sum: same shape as cached_sum. new_cached_count: same shape as cached_count. """ tf.logging.vlog(1, 'inputs: %r', inputs) tf.logging.vlog(1, 'paddings: %r', paddings) tf.logging.vlog(1, 'cached_sum: %r', cached_sum) tf.logging.vlog(1, 'cached_count: %r', cached_count) mask = tf.cast(1.0 - paddings, inputs.dtype) inputs *= tf.cast(mask, inputs.dtype) input_rank = py_utils.GetRank(inputs) assert input_rank is not None, (f'inputs rank must be staic for ' f'{repr(inputs)}') reduce_over_dims = list(range(input_rank)) # Skip B, T, and N. Reduce {F,G} or just G. reduce_over_dims = reduce_over_dims[2:-2] + reduce_over_dims[-1:] tf.logging.vlog(1, 'reduce_over_dims: %s', reduce_over_dims) # [B, T, 1, N, 1] or [B, T, N, 1] sum_v = tf.reduce_sum(inputs, reduce_over_dims, keepdims=True) sum_v = tf.math.cumsum(sum_v, axis=1) sum_v += cached_sum # [B, T, 1, 1, 1] or [B, T, 1, 1] count_v = tf.reduce_sum(mask, reduce_over_dims, keepdims=True) count_v = tf.math.cumsum(count_v, axis=1) input_shape = py_utils.GetShape(inputs) if input_rank == 4: # F * G multiplier = input_shape[-1] * input_shape[-3] else: # G multiplier = input_shape[-1] count_v *= multiplier count_v += cached_count count_v = tf.maximum(count_v, 1.0) tf.logging.vlog(1, 'sum_v: %r', sum_v) tf.logging.vlog(1, 'count_v: %r', count_v) mean = sum_v / count_v if py_utils.FLAGS.tflite_compatible: # TfLite doesn't support broadcasting with 5D tensors. inputs_shape = py_utils.GetShape(inputs) if len(inputs_shape) == 4: tiled_mean = tf.tile(mean, [1, 1, 1, inputs_shape[3]]) else: tiled_mean = tf.tile( mean, [1, 1, inputs_shape[2], 1, inputs_shape[4]]) sum_vv = tf.reduce_sum(tf.math.square(inputs - tiled_mean) * mask, reduce_over_dims, keepdims=True) else: sum_vv = tf.reduce_sum((inputs - mean)**2 * mask, reduce_over_dims, keepdims=True) sum_vv = tf.math.cumsum(sum_vv, axis=1) sum_vv += cached_var cached_sum = sum_v[:, -1:] cached_count = count_v[:, -1:] cached_var = sum_vv[:, -1:] variance = py_utils.with_dependencies([ py_utils.assert_greater_equal(sum_vv, tf.cast(0, sum_vv.dtype)), ], sum_vv / count_v) return mean, variance, cached_sum, cached_count, cached_var
def FProp(self, theta, inputs, paddings=None): """Apply group normalization. Args: theta: A NestedMap object containing weights' values of this layer and its children layers. inputs: The inputs tensor with shape [batch_size, height, width, channel]. paddings: The paddings tensor with shape [batch_size, height]. Intended to be used for sequence processing where `height` is `time`. Returns: A single tensor as the output after applying group normalization, with the same shape as 'inputs'. Or a output, output_paddings pair if input paddings is not None. """ p = self.params inputs = py_utils.with_dependencies([ py_utils.assert_greater_equal(py_utils.GetRank(inputs), p.input_rank) ], inputs) min_group_size = min(p.min_group_size, p.dim) group_size = max(p.dim // p.num_groups, min_group_size) num_groups = p.dim // group_size input_shape = py_utils.GetShape(inputs) with tf.name_scope(p.name): x = tf.reshape(inputs, input_shape[:-1] + [num_groups, group_size]) expanded_rank = p.input_rank + 1 all_dims = list(range(expanded_rank)) if paddings is None: # Skip d0, d[-2] axes = all_dims[1:-2] + all_dims[-1:] counts, means_ss, variance_ss, _, = tf.nn.sufficient_statistics( x, axes=axes, keepdims=True) norm_mean, norm_variance = tf.nn.normalize_moments( counts, means_ss, variance_ss, None) else: expanded_paddings = tf.reshape( paddings, input_shape[:2] + [1] * (expanded_rank - 2)) # skip the batching and group dim if p.cumulative: # Skip d0, d1 and d[-2] reduce_over_dims = all_dims[2:-2] + all_dims[-1:] norm_mean, norm_variance = ComputeMomentsWithPadding( x, expanded_paddings, reduce_over_dims=reduce_over_dims, cumulative_axis=1, keepdims=True) else: # Skip d0, d[-2] reduce_over_dims = all_dims[1:-2] + all_dims[-1:] norm_mean, norm_variance = ComputeMomentsWithPadding( x, expanded_paddings, reduce_over_dims, keepdims=True) norm_mean = py_utils.CheckNumerics( norm_mean, 'mean of %s failed numeric check' % p.name) norm_variance = py_utils.CheckNumerics( norm_variance, 'variance of %s failed numeric check' % p.name) beta = theta.beta gamma = theta.gamma n = input_shape[0] t = input_shape[1] if p.cumulative else 1 norm_shape = [n, t, 1, num_groups, 1 ] if p.input_rank == 4 else [n, t, num_groups, 1] with tf.control_dependencies([ py_utils.assert_greater_equal( norm_variance, tf.cast(0., norm_variance.dtype)), py_utils.assert_shape_match(norm_shape, tf.shape(norm_mean)), py_utils.assert_shape_match(norm_shape, tf.shape(norm_variance)), ]): x = (x - norm_mean) / tf.sqrt(norm_variance + self._epsilon) x = tf.reshape(x, input_shape) gn_output = x * gamma + beta gn_output = tf.reshape(gn_output, input_shape) if paddings is None: return gn_output else: return gn_output, paddings
def IsWithinBBox3D(points_3d, bboxes_3d): """Checks if points are within a 3-d bbox. Args: points_3d: [..., num_points, 3] float32 Tensor specifying points in 3-d space as [x, y, z] coordinates. bboxes_3d: [..., num_bboxes, 7] float32 Tensor specifying a 3-d bboxes specified as [x, y, z, dx, dy, dz, phi] where x, y and z is the center of the box. Returns: boolean Tensor of shape [..., num_points, num_bboxes] indicating whether the points belong within each box. """ # Check that points_3d and bboxes_3d have the same rank. bboxes_rank = py_utils.GetRank(bboxes_3d) points_3d = py_utils.HasRank(points_3d, bboxes_rank) leading_shape = py_utils.GetShape(bboxes_3d)[:-2] # Check that both points_3d and bboxes_3d have the same leading shape. points_3d = py_utils.HasShape(points_3d, leading_shape + [-1, 3]) bboxes_3d = py_utils.HasShape(bboxes_3d, leading_shape + [-1, 7]) num_points = py_utils.GetShape(points_3d)[-2] num_bboxes = py_utils.GetShape(bboxes_3d)[-2] bbox_corners = BBoxCorners(bboxes_3d) bbox_corners = py_utils.HasShape(bbox_corners, leading_shape + [num_bboxes, 8, 3]) # First four points are the top of the bounding box. # Counter-clockwise arrangement of points specifying 2-d Euclidean box. # (x0, y1) <--- (x1, y1) # ^ # | # | # (x0, y0) ---> (x1, y0) bboxes_2d_corners = bbox_corners[..., 0:4, 0:2] # Determine if points lie within 2-D (x, y) plane for all bounding boxes. points_2d = points_3d[..., :2] is_inside_2d = IsWithinBBox(points_2d, bboxes_2d_corners) is_inside_2d = py_utils.HasShape(is_inside_2d, leading_shape + [num_points, num_bboxes]) # Determine if points lie with the z-dimension for all bounding boxes. [_, _, z, _, _, dz, _] = tf.split(bboxes_3d, 7, axis=-1) def _ComputeLimits(center, width): left = center - width / 2.0 right = center + width / 2.0 return left, right z0, z1 = _ComputeLimits(z[..., 0], dz[..., 0]) z_points = points_3d[..., 2:] is_inside_z = tf.math.logical_and( tf.less_equal(z_points, z1[..., tf.newaxis, :]), tf.greater_equal(z_points, z0[..., tf.newaxis, :])) is_inside_z = py_utils.HasShape(is_inside_z, leading_shape + [num_points, num_bboxes]) return tf.math.logical_and(is_inside_z, is_inside_2d)
def _StreamMoments(self, inputs, paddings, cached_sum, cached_count, cached_var): """Computes mean and variance over the valid data points in inputs. Args: inputs: [B, T, F, N, G] or [B, T, N, G] paddings: [B, T, 1, 1, 1] or [B, T, 1, 1] (same rank as inputs) cached_sum: [B, N] cached_count: [B, 1] cached_var: [B, N] Returns: mean: [B, T, 1, N, 1] or [B, T, N, 1] (same rank as inputs) variance: same shape as mean. new_cached_sum: same shape as cached_sum. new_cached_count: same shape as cached_count. new_cached_var: same shape as cached_var. """ tf.logging.vlog(1, 'inputs: %r', inputs) tf.logging.vlog(1, 'paddings: %r', paddings) tf.logging.vlog(1, 'cached_sum: %r', cached_sum) tf.logging.vlog(1, 'cached_count: %r', cached_count) tf.logging.vlog(1, 'cached_var: %r', cached_var) input_rank = py_utils.GetRank(inputs) paddings = py_utils.HasRank(paddings, input_rank) cached_sum = py_utils.HasRank(cached_sum, 2) cached_count = py_utils.HasRank(cached_count, 2) cached_var = py_utils.HasRank(cached_var, 2) input_shape = py_utils.GetShape(inputs) output_shape = input_shape[:] if input_rank == 4: # Skip {B,T,N}. Reduce just G. reduce_over_dims = [3] multiplier = input_shape[3] output_shape[3] = 1 else: assert input_rank == 5 # Skip {B,T,N}. Reduce {F,G}. reduce_over_dims = [2, 4] multiplier = input_shape[2] * input_shape[4] output_shape[2] = 1 output_shape[4] = 1 # [B, T, N] sum_v = tf.reduce_sum( py_utils.ApplyPadding(paddings, inputs), reduce_over_dims, keepdims=False) sum_v = tf.math.cumsum(sum_v, axis=1) sum_v += cached_sum[:, tf.newaxis, :] # [B, T, 1] count_v = tf.reduce_sum( py_utils.ApplyPadding( paddings, tf.cast(multiplier, inputs.dtype), ensure_shape=False), reduce_over_dims, keepdims=False) count_v = tf.math.cumsum(count_v, axis=1) count_v += cached_count[:, tf.newaxis, :] # [B, T, 1, N, 1] or [B, T, N, 1] mean = tf.reshape(sum_v / tf.maximum(count_v, 1.0), output_shape) # [B, T, N] sum_vv = tf.reduce_sum( py_utils.ApplyPadding(paddings, tf.math.squared_difference(inputs, mean)), reduce_over_dims, keepdims=False) sum_vv = tf.math.cumsum(sum_vv, axis=1) sum_vv += cached_var[:, tf.newaxis, :] # [B, N] cached_sum = sum_v[:, -1] # [B, 1] cached_count = count_v[:, -1] # [B, N] cached_var = sum_vv[:, -1] # [B, T, 1, N, 1] or [B, T, N, 1] variance = tf.reshape(sum_vv / tf.maximum(count_v, 1.0), output_shape) tf.logging.vlog(1, 'sum_v: %r', sum_v) tf.logging.vlog(1, 'count_v: %r', count_v) tf.logging.vlog(1, 'sum_vv: %r', sum_vv) return mean, variance, cached_sum, cached_count, cached_var