def filter_functions(): take_all = lambda x: constant_op.constant(True) is_zero = lambda x: math_ops.equal(x, 0) greater = lambda x: math_ops.greater(x + 5, 0) tests = [] filters = [take_all, is_zero, greater] identity = lambda x: x for x, predicate_1 in enumerate(filters): for y, predicate_2 in enumerate(filters): tests.append(("Mixed{}{}".format(x, y), identity, [predicate_1, predicate_2])) for z, predicate_3 in enumerate(filters): tests.append(("Mixed{}{}{}".format(x, y, z), identity, [predicate_1, predicate_2, predicate_3])) take_all_multiple = lambda x, y: constant_op.constant(True) # Multi output tests.append(("Multi1", lambda x: (x, x), [take_all_multiple, take_all_multiple])) tests.append(("Multi2", lambda x: (x, 2), [ take_all_multiple, lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0) ])) return tuple(tests)
def _process_labels(self, labels): if isinstance(labels, sparse_tensor.SparseTensor): if labels.dtype == dtypes.string: label_ids_values = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels.values) label_ids = sparse_tensor.SparseTensor( indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) else: label_ids = labels return math_ops.to_int64( sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) msg = ('labels shape must be [batch_size, {}]. ' 'Given: ').format(self._n_classes) labels_shape = array_ops.shape(labels) check_rank_op = control_flow_ops.Assert( math_ops.equal(array_ops.rank(labels), 2), data=[msg, labels_shape]) check_label_dim = control_flow_ops.Assert( math_ops.equal(labels_shape[-1], self._n_classes), data=[msg, labels_shape]) with ops.control_dependencies([check_rank_op, check_label_dim]): return array_ops.identity(labels)
def rot90(image, k=1, name=None): """Rotate an image counter-clockwise by 90 degrees. Args: image: A 3-D tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. name: A name for this operation (optional). Returns: A rotated 3-D tensor of the same type and shape as `image`. """ with ops.name_scope(name, 'rot90', [image, k]) as scope: image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') k.get_shape().assert_has_rank(0) k = math_ops.mod(k, 4) def _rot90(): return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) def _rot180(): return array_ops.reverse_v2(image, [0, 1]) def _rot270(): return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), (math_ops.equal(k, 3), _rot270)] ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True, name=scope) ret.set_shape([None, None, image.get_shape()[2]]) return ret
def map_and_filter_functions(): identity = lambda x: x increment = lambda x: x + 1 minus_five = lambda x: x - 5 def increment_and_square(x): y = x + 1 return y * y take_all = lambda x: constant_op.constant(True) is_zero = lambda x: math_ops.equal(x, 0) is_odd = lambda x: math_ops.equal(x % 2, 0) greater = lambda x: math_ops.greater(x + 5, 0) functions = [identity, increment, minus_five, increment_and_square] filters = [take_all, is_zero, is_odd, greater] tests = [] for x, fun in enumerate(functions): for y, predicate in enumerate(filters): tests.append(("Mixed{}{}".format(x, y), fun, predicate)) # Multi output tests.append(("Multi1", lambda x: (x, x), lambda x, y: constant_op.constant(True))) tests.append( ("Multi2", lambda x: (x, 2), lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0))) return tuple(tests)
def _process_labels(self, labels): if labels is None: raise ValueError( 'You must provide a labels Tensor. Given: None. ' 'Suggested troubleshooting steps: Check that your data contain ' 'your label feature. Check that your input_fn properly parses and ' 'returns labels.') if isinstance(labels, sparse_tensor.SparseTensor): if labels.dtype == dtypes.string: label_ids_values = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), name='class_id_lookup').lookup(labels.values) label_ids = sparse_tensor.SparseTensor( indices=labels.indices, values=label_ids_values, dense_shape=labels.dense_shape) else: label_ids = labels return math_ops.to_int64( sparse_ops.sparse_to_indicator(label_ids, self._n_classes)) msg = ('labels shape must be [batch_size, {}]. ' 'Given: ').format(self._n_classes) labels_shape = array_ops.shape(labels) check_rank_op = control_flow_ops.Assert( math_ops.equal(array_ops.rank(labels), 2), data=[msg, labels_shape]) check_label_dim = control_flow_ops.Assert( math_ops.equal(labels_shape[-1], self._n_classes), data=[msg, labels_shape]) with ops.control_dependencies([check_rank_op, check_label_dim]): return array_ops.identity(labels)
def _decode(self, image_buffer, image_format): """Decodes the image buffer. Args: image_buffer: The tensor representing the encoded image tensor. image_format: The image format for the image in `image_buffer`. If image format is `raw`, all images are expected to be in this format, otherwise this op can decode a mix of `jpg` and `png` formats. Returns: A tensor that represents decoded image of self._shape, or (?, ?, self._channels) if self._shape is not specified. """ def decode_image(): """Decodes a png or jpg based on the headers.""" return image_ops.decode_image(image_buffer, self._channels) def decode_raw(): """Decodes a raw image.""" return parsing_ops.decode_raw(image_buffer, out_type=self._dtype) pred_fn_pairs = { math_ops.logical_or( math_ops.equal(image_format, 'raw'), math_ops.equal(image_format, 'RAW')): decode_raw, } image = control_flow_ops.case( pred_fn_pairs, default=decode_image, exclusive=True) image.set_shape([None, None, self._channels]) if self._shape is not None: image = array_ops.reshape(image, self._shape) return image
def _check_labels_and_scores(boolean_labels, scores, check_shape): """Check the rank of labels/scores, return tensor versions.""" with ops.op_scope([boolean_labels, scores], '_check_labels_and_scores'): boolean_labels = ops.convert_to_tensor(boolean_labels, name='boolean_labels') scores = ops.convert_to_tensor(scores, name='scores') if boolean_labels.dtype != dtypes.bool: raise ValueError( 'Argument boolean_labels should have dtype bool. Found: %s', boolean_labels.dtype) if check_shape: labels_rank_1 = logging_ops.Assert( math_ops.equal(1, array_ops.rank(boolean_labels)), ['Argument boolean_labels should have rank 1. Found: ', boolean_labels.name, array_ops.shape(boolean_labels)]) scores_rank_1 = logging_ops.Assert( math_ops.equal(1, array_ops.rank(scores)), ['Argument scores should have rank 1. Found: ', scores.name, array_ops.shape(scores)]) with ops.control_dependencies([labels_rank_1, scores_rank_1]): return boolean_labels, scores else: return boolean_labels, scores
def _broadcast_uniform_partitioned_dimension(self, axis, lengths): """Broadcasts the partitioned dimension `axis` to match `lengths`.""" axis_dim_size = self.dimension_size(axis) partitioned_sizes = list(self._partitioned_dim_sizes[:axis]) if lengths.shape.ndims == 0: lengths = array_ops.where( math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size) repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1) splits = array_ops.stack([0, self.num_slices_in_dimension(axis)]) else: splits = math_ops.range( array_ops.size(lengths, out_type=self.dim_size_dtype) + 1) repeats = lengths partitioned_sizes.append(lengths) for dim_size in self._partitioned_dim_sizes[axis + 1:]: if dim_size.shape.ndims == 0: partitioned_sizes.append(dim_size) splits *= dim_size else: partitioned_sizes.append( ragged_util.repeat_ranges(dim_size, splits, repeats)) splits = array_ops.gather( ragged_util.lengths_to_splits(dim_size), splits) inner_sizes = self._inner_dim_sizes return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
def _compute_zeroone_score(labels, predictions): zeroone_score = math_ops.to_float( math_ops.equal( math_ops.reduce_sum( math_ops.to_int32(math_ops.equal(labels, predictions))), array_ops.shape(labels)[0])) return zeroone_score
def _decode(self, image_buffer, image_format): """Decodes the image buffer. Args: image_buffer: T tensor representing the encoded image tensor. image_format: The image format for the image in `image_buffer`. Returns: A decoder image. """ def decode_png(): return image_ops.decode_png(image_buffer, self._channels) def decode_raw(): return parsing_ops.decode_raw(image_buffer, dtypes.uint8) def decode_jpg(): return image_ops.decode_jpeg(image_buffer, self._channels) image = control_flow_ops.case({ math_ops.logical_or(math_ops.equal(image_format, 'png'), math_ops.equal(image_format, 'PNG')): decode_png, math_ops.logical_or(math_ops.equal(image_format, 'raw'), math_ops.equal(image_format, 'RAW')): decode_raw, }, default=decode_jpg, exclusive=True) image.set_shape([None, None, self._channels]) if self._shape is not None: image = array_ops.reshape(image, self._shape) return image
def get_seed(seed): """Returns the local seeds an operation should use given an op-specific seed. See `tf.compat.v1.get_seed` for more details. This wrapper adds support for the case where `seed` may be a tensor. Args: seed: An integer or a `tf.int64` scalar tensor. Returns: A tuple of two `tf.int64` scalar tensors that should be used for the local seed of the calling dataset. """ seed, seed2 = random_seed.get_seed(seed) if seed is None: seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") else: seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") if seed2 is None: seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") else: with ops.name_scope("seed2") as scope: seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64) seed2 = array_ops.where( math_ops.logical_and( math_ops.equal(seed, 0), math_ops.equal(seed2, 0)), constant_op.constant(2**31 - 1, dtype=dtypes.int64), seed2, name=scope) return seed, seed2
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ if isinstance(denominator, float): if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) if context.in_eager_mode() and denominator._rank() == 0: # pylint: disable=protected-access if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def same_dynamic_shape(a, b): """Returns whether a and b have the same dynamic shape. Args: a: `Tensor` b: `Tensor` Returns: `Boolean` `Tensor` representing if both tensors have the same shape. """ a = ops.convert_to_tensor(a, name="a") b = ops.convert_to_tensor(b, name="b") # One of the shapes isn't fully defined, so we need to use the dynamic # shape. return control_flow_ops.cond( math_ops.equal(array_ops.rank(a), array_ops.rank(b)), # Here we can't just do math_ops.equal(a.shape, b.shape), since # static shape inference may break the equality comparison between # shape(a) and shape(b) in math_ops.equal. lambda: math_ops.reduce_all(math_ops.equal( array_ops.concat_v2(( array_ops.shape(a), array_ops.shape(b)), 0), array_ops.concat_v2(( array_ops.shape(b), array_ops.shape(a)), 0))), lambda: constant_op.constant(False))
def _compute_zeroone_score(labels, predictions): zeroone_score = math_ops.cast( math_ops.equal( math_ops.reduce_sum( math_ops.cast(math_ops.equal(labels, predictions), dtypes.int32)), array_ops.shape(labels)[0]), dtypes.float32) return zeroone_score
def _dynamic_rank_in(actual_rank, given_ranks): if len(given_ranks) < 1: return ops.convert_to_tensor(False) result = math_ops.equal(given_ranks[0], actual_rank) for given_rank in given_ranks[1:]: result = math_ops.logical_or( result, math_ops.equal(given_rank, actual_rank)) return result
def testCase_dict(self): x = constant_op.constant(2) conditions = { math_ops.equal(x, 1): lambda: constant_op.constant(2), math_ops.equal(x, 2): lambda: constant_op.constant(4) } output = control_flow_ops.case(conditions, exclusive=True) self.assertEqual(4, self.evaluate(output))
def remove_squeezable_dimensions( labels, predictions, expected_rank_diff=0, name=None): """Squeeze last dim if ranks differ from expected by exactly 1. In the common case where we expect shapes to match, `expected_rank_diff` defaults to 0, and we squeeze the last dimension of the larger rank if they differ by 1. But, for example, if `labels` contains class IDs and `predictions` contains 1 probability per class, we expect `predictions` to have 1 more dimension than `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze `labels` if `rank(predictions) - rank(labels) == 0`, and `predictions` if `rank(predictions) - rank(labels) == 2`. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: labels: Label values, a `Tensor` whose dimensions match `predictions`. predictions: Predicted values, a `Tensor` of arbitrary dimensions. expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. name: Name of the op. Returns: Tuple of `labels` and `predictions`, possibly with last dim squeezed. """ with ops.name_scope(name, 'remove_squeezable_dimensions', [labels, predictions]): predictions = ops.convert_to_tensor(predictions) labels = ops.convert_to_tensor(labels) predictions_shape = predictions.get_shape() predictions_rank = predictions_shape.ndims labels_shape = labels.get_shape() labels_rank = labels_shape.ndims if (labels_rank is not None) and (predictions_rank is not None): # Use static rank. rank_diff = predictions_rank - labels_rank if rank_diff == expected_rank_diff + 1: predictions = array_ops.squeeze(predictions, [-1]) elif rank_diff == expected_rank_diff - 1: labels = array_ops.squeeze(labels, [-1]) return labels, predictions # Use dynamic rank. rank_diff = array_ops.rank(predictions) - array_ops.rank(labels) if (predictions_rank is None) or ( predictions_shape.dims[-1].is_compatible_with(1)): predictions = control_flow_ops.cond( math_ops.equal(expected_rank_diff + 1, rank_diff), lambda: array_ops.squeeze(predictions, [-1]), lambda: predictions) if (labels_rank is None) or ( labels_shape.dims[-1].is_compatible_with(1)): labels = control_flow_ops.cond( math_ops.equal(expected_rank_diff - 1, rank_diff), lambda: array_ops.squeeze(labels, [-1]), lambda: labels) return labels, predictions
def _loss_fn(labels, logits): check_labels = control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(labels, labels_input)), data=[labels]) check_logits = control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(logits, logits_input)), data=[logits]) with ops.control_dependencies([check_labels, check_logits]): return constant_op.constant(loss)
def testCase_withDefault(self): x = array_ops.placeholder(dtype=dtypes.int32, shape=[]) conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)), (math_ops.equal(x, 2), lambda: constant_op.constant(4))] default = lambda: constant_op.constant(6) output = control_flow_ops.case(conditions, default, exclusive=True) with self.test_session() as sess: self.assertEqual(sess.run(output, feed_dict={x: 1}), 2) self.assertEqual(sess.run(output, feed_dict={x: 2}), 4) self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
def _decode(self, image_buffer, image_format): """Decodes the image buffer. Args: image_buffer: The tensor representing the encoded image tensor. image_format: The image format for the image in `image_buffer`. If image format is `raw`, all images are expected to be in this format, otherwise this op can decode a mix of `jpg` and `png` formats. Returns: A tensor that represents decoded image of self._shape, or (?, ?, self._channels) if self._shape is not specified. """ def decode_image(): """Decodes a image based on the headers.""" return math_ops.cast( image_ops.decode_image(image_buffer, channels=self._channels), self._dtype) def decode_jpeg(): """Decodes a jpeg image with specified '_dct_method'.""" return math_ops.cast( image_ops.decode_jpeg( image_buffer, channels=self._channels, dct_method=self._dct_method), self._dtype) def check_jpeg(): """Checks if an image is jpeg.""" # For jpeg, we directly use image_ops.decode_jpeg rather than decode_image # in order to feed the jpeg specify parameter 'dct_method'. return control_flow_ops.cond( image_ops.is_jpeg(image_buffer), decode_jpeg, decode_image, name='cond_jpeg') def decode_raw(): """Decodes a raw image.""" return parsing_ops.decode_raw(image_buffer, out_type=self._dtype) pred_fn_pairs = { math_ops.logical_or( math_ops.equal(image_format, 'raw'), math_ops.equal(image_format, 'RAW')): decode_raw, } image = control_flow_ops.case( pred_fn_pairs, default=check_jpeg, exclusive=True) image.set_shape([None, None, self._channels]) if self._shape is not None: image = array_ops.reshape(image, self._shape) return image
def testCase_withoutDefault(self): x = array_ops.placeholder(dtype=dtypes.int32, shape=[]) conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)), (math_ops.equal(x, 2), lambda: constant_op.constant(4)), (math_ops.equal(x, 3), lambda: constant_op.constant(6))] output = control_flow_ops.case(conditions, exclusive=True) with self.test_session() as sess: self.assertEqual(sess.run(output, feed_dict={x: 1}), 2) self.assertEqual(sess.run(output, feed_dict={x: 2}), 4) self.assertEqual(sess.run(output, feed_dict={x: 3}), 6) with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"): sess.run(output, feed_dict={x: 4})
def _maybe_convert_labels(y_true): """Converts binary labels into -1/1.""" are_zeros = math_ops.equal(y_true, 0) are_ones = math_ops.equal(y_true, 1) is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones)) def _convert_binary_labels(): # Convert the binary labels to -1 or 1. return 2. * y_true - 1. updated_y_true = smart_cond.smart_cond(is_binary, _convert_binary_labels, lambda: y_true) return updated_y_true
def testCase_multiple_matches_exclusive(self): x = array_ops.placeholder(dtype=dtypes.int32, shape=[]) conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)), (math_ops.equal(x, 2), lambda: constant_op.constant(4)), (math_ops.equal(x, 2), lambda: constant_op.constant(6))] default = lambda: constant_op.constant(8) output = control_flow_ops.case(conditions, default, exclusive=True) with self.test_session() as sess: self.assertEqual(sess.run(output, feed_dict={x: 1}), 2) self.assertEqual(sess.run(output, feed_dict={x: 3}), 8) with self.assertRaisesRegexp(errors.InvalidArgumentError, "More than one condition evaluated as True"): sess.run(output, feed_dict={x: 2})
def testEqual(self): # Scalar inputs. tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(1)) self.assertEqual(tensor_util.constant_value(tf_val), True) tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(0)) self.assertEqual(tensor_util.constant_value(tf_val), False) # Shaped inputs with broadcast semantics. tf_val = math_ops.equal(constant_op.constant([[0, 1]]), constant_op.constant([[0], [1]])) c_val = tensor_util.constant_value(tf_val) self.assertAllEqual(c_val, [[True, False], [False, True]])
def control_map_fn(x, y): def multiply(): return x * 2 def divide(): return x // 2 pred_fn_pairs = { math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)): divide, } return control_flow_ops.case( pred_fn_pairs, default=multiply, exclusive=True)
def _process_labels(self, labels): if isinstance(labels, sparse_tensor.SparseTensor): return math_ops.to_int64( sparse_ops.sparse_to_indicator(labels, self._n_classes)) msg = ('labels shape must be [batch_size, {}]. ' 'Given: ').format(self._n_classes) labels_shape = array_ops.shape(labels) check_rank_op = control_flow_ops.Assert( math_ops.equal(array_ops.rank(labels), 2), data=[msg, labels_shape]) check_label_dim = control_flow_ops.Assert( math_ops.equal(labels_shape[-1], self._n_classes), data=[msg, labels_shape]) with ops.control_dependencies([check_rank_op, check_label_dim]): return array_ops.identity(labels)
def __call__(self, step): with ops.name_scope( self.name, "PolynomialDecay", [self.initial_learning_rate, step, self.decay_steps, self.end_learning_rate, self.power] ) as name: initial_learning_rate = ops.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype end_learning_rate = math_ops.cast(self.end_learning_rate, dtype) power = math_ops.cast(self.power, dtype) global_step_recomp = math_ops.cast(step, dtype) decay_steps_recomp = math_ops.cast(self.decay_steps, dtype) if self.cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step_recomp, 0), lambda: 1.0, lambda: math_ops.ceil(global_step_recomp / self.decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, self.decay_steps) p = math_ops.div(global_step_recomp, decay_steps_recomp) return math_ops.add( math_ops.multiply(initial_learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def testNonSequenceNestedStructure(self): components = np.array([1, 2, 3], dtype=np.int64) dataset = dataset_ops.Dataset.from_tensors(components) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) dataset = dataset.filter( lambda x: math_ops.reduce_all(math_ops.equal(x, components))) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) dataset = dataset.map(lambda x: array_ops.stack([x, x])) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([2, 3], dataset.output_shapes) dataset = dataset.flat_map( lambda x: dataset_ops.Dataset.from_tensor_slices(x)) self.assertEquals(dtypes.int64, dataset.output_types) self.assertEquals([3], dataset.output_shapes) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() self.assertEquals(dtypes.int64, get_next.dtype) self.assertEquals([3], get_next.shape)
def _num_present(losses, weights, per_batch=False): """Computes the number of elements in the loss function induced by `weights`. A given weights tensor induces different numbers of usable elements in the `losses` tensor. The `weights` tensor is broadcast across `losses` for all possible dimensions. For example, if `losses` is a tensor of dimension `[4, 5, 6, 3]` and `weights` is a tensor of shape `[4, 5]`, then `weights` is, in effect, tiled to match the shape of `losses`. Following this effective tile, the total number of present elements is the number of non-zero weights. Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. weights: `Tensor` of shape `[]`, `[batch_size]` or `[batch_size, d1, ... dK]`, where K < N. per_batch: Whether to return the number of elements per batch or as a sum total. Returns: The number of present (non-zero) elements in the losses tensor. If `per_batch` is `True`, the value is returned as a tensor of size `[batch_size]`. Otherwise, a single scalar tensor is returned. """ with ops.name_scope(None, "num_present", (losses, weights)) as scope: weights = math_ops.to_float(weights) present = array_ops.where( math_ops.equal(weights, 0.0), array_ops.zeros_like(weights), array_ops.ones_like(weights)) present = weights_broadcast_ops.broadcast_weights(present, losses) if per_batch: return math_ops.reduce_sum( present, axis=math_ops.range(1, array_ops.rank(present)), keep_dims=True, name=scope) return math_ops.reduce_sum(present, name=scope)
def _make_columnar(self, x): """Ensures non-scalar input has at least one column. Example: If `x = [1, 2, 3]` then the output is `[[1], [2], [3]]`. If `x = [[1, 2, 3], [4, 5, 6]]` then the output is unchanged. If `x = 1` then the output is unchanged. Args: x: `Tensor`. Returns: columnar_x: `Tensor` with at least two dimensions. """ if x.get_shape().ndims is not None: if x.get_shape().ndims == 1: x = x[array_ops.newaxis, :] return x shape = array_ops.shape(x) maybe_expanded_shape = array_ops.concat([ shape[:-1], distribution_util.pick_vector( math_ops.equal(array_ops.rank(x), 1), [1], np.array([], dtype=np.int32)), shape[-1:], ], 0) return array_ops.reshape(x, maybe_expanded_shape)
def defaults_two(): return control_flow_ops.cond( math_ops.equal(math_ops.mod(x, 2), 0), multiply, divide, name="cond_mult")
def move_dimension(x, source_idx, dest_idx): """Move a single tensor dimension within its shape. This is a special case of `tf.transpose()`, which applies arbitrary permutations to tensor dimensions. Args: x: Tensor of rank `ndims`. source_idx: Integer index into `x.shape` (negative indexing is supported). dest_idx: Integer index into `x.shape` (negative indexing is supported). Returns: x_perm: Tensor of rank `ndims`, in which the dimension at original index `source_idx` has been moved to new index `dest_idx`, with all other dimensions retained in their original order. Example: ```python x = tf.placeholder(shape=[200, 30, 4, 1, 6]) x_perm = _move_dimension(x, 1, 1) # no-op x_perm = _move_dimension(x, 0, 3) # result shape [30, 4, 1, 200, 6] x_perm = _move_dimension(x, 0, -2) # equivalent to previous x_perm = _move_dimension(x, 4, 2) # result shape [200, 30, 6, 4, 1] ``` """ ndims = util.prefer_static_rank(x) if isinstance(source_idx, int): dtype = dtypes.int32 else: dtype = dtypes.as_dtype(source_idx.dtype) # Handle negative indexing. Since ndims might be dynamic, this makes # source_idx and dest_idx also possibly dynamic. if source_idx < 0: source_idx = ndims + source_idx if dest_idx < 0: dest_idx = ndims + dest_idx # Construct the appropriate permutation of dimensions, depending # whether the source is before or after the destination. def move_left_permutation(): return util.prefer_static_value( array_ops.concat([ math_ops.range(0, dest_idx, dtype=dtype), [source_idx], math_ops.range(dest_idx, source_idx, dtype=dtype), math_ops.range(source_idx + 1, ndims, dtype=dtype) ], axis=0)) def move_right_permutation(): return util.prefer_static_value( array_ops.concat([ math_ops.range(0, source_idx, dtype=dtype), math_ops.range(source_idx + 1, dest_idx + 1, dtype=dtype), [source_idx], math_ops.range(dest_idx + 1, ndims, dtype=dtype) ], axis=0)) def x_permuted(): return array_ops.transpose(x, perm=smart_cond.smart_cond( source_idx < dest_idx, move_right_permutation, move_left_permutation)) # One final conditional to handle the special case where source # and destination indices are equal. return smart_cond.smart_cond(math_ops.equal(source_idx, dest_idx), lambda: x, x_permuted)
def get_weights_and_check_match_logits(features, weight_column, logits, allow_per_logit_weights=False): """Fetches weights from features and checks that the shape matches logits. Consider logits of shape [D0, D1, ... DN, logits_dimension]. Weights shape can be either: * [D0, D1, ... DN, logits_dimension] if `allow_per_logit_weights=True`. * [D0, D1, ... DN, 1] * [D0, D1, ... DN]: In this case, weights is reshaped into [D0, D1, ... DN, 1] to work with weight broadcasting rules. Args: features: The features dict that contains weights. weight_column: The weight column. If not given, this method returns 1. logits: logits Tensor. allow_per_logit_weights: Boolean. Whether we allow weights along the logits dimension, namely shape `[D0, D1, ... DN, logits_dimension]`. Returns: Validated and reshaped weights Tensor. Raises: ValueError: If the weights `Tensor` cannot be cast into float. """ if allow_per_logit_weights: err_msg = ( 'weights shape must be [D0, D1, ... DN], [D0, D1, ... DN, 1] or ' '[D0, D1, ... DN, logits_dimension]') else: err_msg = ( 'weights shape must be [D0, D1, ... DN] or [D0, D1, ... DN, 1]') with ops.name_scope('weights', values=tuple(six.itervalues(features)) + (logits, )) as scope: # Fetch the weights. if weight_column is None: return 1. # TODO(b/117839674): update feature_column if isinstance(weight_column, six.string_types): weight_column = feature_column_lib.numeric_column( key=weight_column, shape=(1, )) if not isinstance(weight_column, feature_column_lib._NumericColumn): # pylint: disable=protected-access raise TypeError( 'Weight column must be either a string or _NumericColumn.' ' Given type: {}.'.format(type(weight_column))) weights = weight_column._get_dense_tensor( # pylint: disable=protected-access feature_column_lib._LazyBuilder(features)) # pylint: disable=protected-access if not (weights.dtype.is_floating or weights.dtype.is_integer): raise ValueError('Weight column should be castable to float. ' 'Given dtype: {}'.format(weights.dtype)) weights = math_ops.to_float(weights, name='weights') # Validate the weights shape. # Eager mode. if context.executing_eagerly(): weights_shape = weights._shape_tuple() # pylint: disable=protected-access logits_shape = logits._shape_tuple() # pylint: disable=protected-access weights_rank = weights._rank() # pylint: disable=protected-access logits_rank = logits._rank() # pylint: disable=protected-access if (weights_rank is not None and logits_rank is not None and weights_rank == logits_rank - 1): if logits_shape[:-1] != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = logits_shape[:-1] + (1, ) if allow_per_logit_weights: if (logits_shape != weights_shape and supported_weights_shape != weights_shape): raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) else: if supported_weights_shape != weights_shape: raise ValueError( '{}, logits_shape: {}. weights_shape: {}.'.format( err_msg, logits_shape, weights_shape)) return weights # Graph mode. weights_shape = array_ops.shape(weights, name='weights_shape') logits_shape = array_ops.shape(logits, name='logits_shape') if (weights.shape.ndims is not None and logits.shape.ndims is not None and weights.shape.ndims == logits.shape.ndims - 1): assert_dimension = check_ops.assert_equal(logits_shape[:-1], weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.expand_dims(weights, -1, name=scope) supported_weights_shape = array_ops.concat([logits_shape[:-1], [1]], axis=0) if allow_per_logit_weights: condition = math_ops.reduce_any([ math_ops.reduce_all(math_ops.equal(logits_shape, weights_shape)), math_ops.reduce_all( math_ops.equal(supported_weights_shape, weights_shape)) ]) assert_dimension = control_flow_ops.Assert(condition=condition, data=[ err_msg, 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) else: assert_dimension = check_ops.assert_equal(supported_weights_shape, weights_shape, message=err_msg, data=[ 'logits_shape: ', logits_shape, 'weights_shape: ', weights_shape ]) with ops.control_dependencies([assert_dimension]): return array_ops.identity(weights, name=scope)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight, coverage_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished not_finished = math_ops.logical_not(previously_finished) # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot(indices=array_ops.fill( [batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(not_finished) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the accumulated attention probabilities if coverage penalty is # enabled. accumulated_attention_probs = None attention_probs = get_attention_probs(next_cell_state, coverage_penalty_weight) if attention_probs is not None: attention_probs *= array_ops.expand_dims( math_ops.to_float(not_finished), 2) accumulated_attention_probs = (beam_state.accumulated_attention_probs + attention_probs) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight, coverage_penalty_weight=coverage_penalty_weight, finished=previously_finished, accumulated_attention_probs=accumulated_attention_probs) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function next_beam_size = ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width") next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged. # 2. Beams that are now finished (EOS predicted) have their length # increased by 1. # 3. Beams that are not yet finished have their length increased by 1. lengths_to_add = math_ops.to_int64( math_ops.logical_not(previously_finished)) next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add next_accumulated_attention_probs = () if accumulated_attention_probs is not None: next_accumulated_attention_probs = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=accumulated_attention_probs, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1], name="next_accumulated_attention_probs") # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished, accumulated_attention_probs=next_accumulated_attention_probs) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def _train_deeplab_model(iterator, num_of_classes, ignore_label): """Trains the deeplab model. Args: iterator: An iterator of type tf.data.Iterator for images and labels. num_of_classes: Number of classes for the dataset. ignore_label: Ignore label for the dataset. Returns: train_tensor: A tensor to update the model variables. summary_op: An operation to log the summaries. """ global_step = tf.train.get_or_create_global_step() summaries = [] learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) summaries.append(tf.summary.scalar('learning_rate', learning_rate)) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) tower_grads = [] tower_summaries = None for i in range(FLAGS.num_clones): with tf.device('/gpu:%d' % i): with tf.name_scope('clone_%d' % i) as scope: loss = _tower_loss(iterator=iterator, num_of_classes=num_of_classes, ignore_label=ignore_label, scope=scope, reuse_variable=(i != 0)) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) # Retain the summaries from the first tower. if not i: tower_summaries = tf.summary.merge_all(scope=scope) with tf.device('/cpu:0'): grads_and_vars = _average_gradients(tower_grads) if tower_summaries is not None: summaries.append(tower_summaries) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = tf.contrib.training.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Gather update_ops. These contain, for example, # the updates for the batch_norm variables created by model_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) update_ops.append(grad_updates) update_op = tf.group(*update_ops) total_loss = tf.losses.get_total_loss(add_regularization_losses=True) # Print total loss to the terminal. # This implementation is mirrored from tf.slim.summaries. should_log = math_ops.equal(math_ops.mod(global_step, FLAGS.log_steps), 0) total_loss = tf.cond( should_log, lambda: tf.Print(total_loss, [total_loss], 'Total loss is :'), lambda: total_loss) summaries.append(tf.summary.scalar('total_loss', total_loss)) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') summary_op = tf.summary.merge(summaries) return train_tensor, summary_op
def _maybe_expand_weights(): expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1]) return control_flow_ops.cond(math_ops.equal(rank_diff, -1), expand_weights, lambda: sample_weight)
def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): """Squeeze or expand last dimension if needed. 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 (using `remove_squeezable_dimensions`). 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 from the new rank of `y_pred`. If `sample_weight` is scalar, it is kept scalar. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. Args: y_pred: Predicted values, a `Tensor` of arbitrary dimensions. y_true: Optional label `Tensor` whose dimensions match `y_pred`. sample_weight: Optional weight scalar or `Tensor` whose dimensions match `y_pred`. Returns: Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has the last dimension squeezed, `sample_weight` could be extended by one dimension. If `sample_weight` is None, (y_pred, y_true) is returned. """ y_pred_shape = y_pred.shape y_pred_rank = y_pred_shape.ndims if y_true is not None: # If sparse matrix is provided as `y_true`, the last dimension in `y_pred` # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) # In this case, we should not try to remove squeezable dimension. y_true_shape = y_true.shape y_true_rank = y_true_shape.ndims if (y_true_rank is not None) and (y_pred_rank is not None): # Use static rank for `y_true` and `y_pred`. if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred) else: # Use dynamic rank. rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true) squeeze_dims = lambda: remove_squeezable_dimensions( # pylint: disable=g-long-lambda y_true, y_pred) is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1]) maybe_squeeze_dims = lambda: control_flow_ops.cond( # pylint: disable=g-long-lambda is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)) y_true, y_pred = control_flow_ops.cond( math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims) if sample_weight is None: return y_pred, y_true weights_shape = sample_weight.shape weights_rank = weights_shape.ndims if weights_rank == 0: # If weights is scalar, do nothing. return y_pred, y_true, sample_weight if (y_pred_rank is not None) and (weights_rank is not None): # Use static rank. if weights_rank - y_pred_rank == 1: sample_weight = array_ops.squeeze(sample_weight, [-1]) elif y_pred_rank - weights_rank == 1: sample_weight = array_ops.expand_dims(sample_weight, [-1]) return y_pred, y_true, sample_weight # Use dynamic rank. weights_rank_tensor = array_ops.rank(sample_weight) rank_diff = weights_rank_tensor - array_ops.rank(y_pred) maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1]) def _maybe_expand_weights(): expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1]) return control_flow_ops.cond(math_ops.equal(rank_diff, -1), expand_weights, lambda: sample_weight) def _maybe_adjust_weights(): return control_flow_ops.cond(math_ops.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights) # squeeze or expand last dim of `sample_weight` if its rank differs by 1 # from the new rank of `y_pred`. sample_weight = control_flow_ops.cond( math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight, _maybe_adjust_weights) return y_pred, y_true, sample_weight
def lifted_struct_loss(labels, embeddings, margin=1.0): """Computes the lifted structured loss. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than any negative distances (between a pair of embeddings with different labels) in the mini-batch in a way that is differentiable with respect to the embedding vectors. See: https://arxiv.org/abs/1511.06452. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not be l2 normalized. margin: Float, margin term in the loss definition. Returns: lifted_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pairwise_distances = pairwise_distance(embeddings) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) diff = margin - pairwise_distances mask = math_ops.cast(adjacency_not, dtype=dtypes.float32) # Safe maximum: Temporarily shift negative distances # above zero before taking max. # this is to take the max only among negatives. row_minimums = math_ops.reduce_min(diff, 1, keep_dims=True) row_negative_maximums = math_ops.reduce_max( math_ops.multiply(diff - row_minimums, mask), 1, keep_dims=True) + row_minimums max_elements = math_ops.maximum(row_negative_maximums, array_ops.transpose(row_negative_maximums)) diff_tiled = array_ops.tile(diff, [batch_size, 1]) mask_tiled = array_ops.tile(mask, [batch_size, 1]) max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements), [-1, 1]) loss_exp_left = array_ops.reshape( math_ops.reduce_sum(math_ops.multiply( math_ops.exp(diff_tiled - max_elements_vect), mask_tiled), 1, keep_dims=True), [batch_size, batch_size]) loss_mat = max_elements + math_ops.log(loss_exp_left + array_ops.transpose(loss_exp_left)) # Add the positive distance. loss_mat += pairwise_distances mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2. num_positives = math_ops.reduce_sum(mask_positives) / 2.0 lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum( math_ops.square( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0))), num_positives, name='liftedstruct_loss') return lifted_loss
def broadcast_dimension(self, axis, lengths): """Returns a shape that is broadcast-compatible with self & lengths. * If dimension[axis] is uniform and lengths is a scalar, the check that either lengths==1 or axis==1 or lengths==axis, and tile dimension[axis] with tf.where(lengths==axis, 1, axis) repeats. * If dimension[axis] is uniform and lengths is a vector, then check that dimension[axis]==1, and raggedly tile dimension[axis] with lengths repeats. (we can skip tiling if we statically know that slice_lengths == 1??) * If dimension[axis] is ragged and lengths is a scalar, then check that lengths==1. * If dimension[axis] is ragged and lengths is a vector, then check that self.dimension_size(axis) == lengths. Args: axis: `int`. The dimension to broadcast. lengths: 0-D or 1-D integer `Tensor`. Returns: A `RaggedTensorDynamicShape`. """ lengths = ragged_util.convert_to_int_tensor(lengths, name='lengths', dtype=dtypes.int64) # Check whether lengths is a scalar (for uniform dimensions) or # vector (for ragged dimensions). if lengths.shape.ndims is None: raise ValueError('lengths must have a known rank.') elif lengths.shape.ndims > 1: raise ValueError('lengths must be a scalar or vector') else: lengths_is_scalar = (lengths.shape.ndims == 0) # Verify that the shapes are compatible. if self.is_ragged(axis): if lengths_is_scalar: condition = math_ops.equal(lengths, 1) else: condition = math_ops.reduce_all( math_ops.equal(lengths, self.dimension_size(axis))) else: axis_dim_size = self.dimension_size(axis) if lengths_is_scalar: condition = (math_ops.equal(lengths, 1) | math_ops.equal(axis_dim_size, 1) | math_ops.equal(axis_dim_size, lengths)) else: condition = math_ops.equal(axis_dim_size, 1) broadcast_err = [ 'Unable to broadcast: dimension size mismatch in dimension', axis, 'lengths=', lengths, 'dim_size=', self.dimension_size(axis) ] broadcast_check = control_flow_ops.Assert(condition, data=broadcast_err, summarize=10) with ops.control_dependencies([broadcast_check]): # Partitioned dimensions: if axis < self.num_partitioned_dimensions: if self.is_ragged(axis): # Use an identity op to make sure the check actually gets run. return RaggedTensorDynamicShape( self._partitioned_dim_sizes, array_ops.identity(self.inner_dim_sizes)) else: return self._broadcast_uniform_partitioned_dimension( axis, lengths) # Inner dimensions: else: if lengths_is_scalar: return self._broadcast_inner_dimension_to_uniform( axis, lengths) else: if axis == 0: raise ValueError( 'Unable to broadcast: ' 'outermost dimension must be uniform.') return self._broadcast_inner_dimension_to_ragged( axis, lengths)
def _broadcast_to_ragged_shape(rt_input, dst_shape, broadcast_inner_dimensions): """Broadcasts rt_input to the ragged shape `dst_shape`.""" # dst_shape's rank and ragged_rank must be greater than or equal to rt_input's if rt_input.shape.ndims is None or dst_shape.rank is None: raise ValueError('Unable to broadcast: unknown rank') if rt_input.shape.ndims > dst_shape.rank: raise ValueError('Incompatible with shape: rank mismatch') if (isinstance(rt_input, ragged_tensor.RaggedTensor) and rt_input.ragged_rank >= dst_shape.num_partitioned_dimensions): raise ValueError('Incompatible with shape: ragged rank mismatch') src_shape = RaggedTensorDynamicShape.from_tensor(rt_input) src_shape = src_shape.broadcast_to_rank(dst_shape.rank) # Add dimensions to rt_input so its rank and ragged_rank matches dst_shape. if dst_shape.rank > rt_input.shape.ndims: if rt_input.shape.ndims < dst_shape.num_inner_dimensions + 1: rt_input = array_ops.reshape( rt_input, array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0)) for _ in range(dst_shape.rank - rt_input.shape.ndims): rt_input = ragged_factory_ops.from_row_lengths( rt_input, [ragged_array_ops.nrows(rt_input)]) # Add ragged dimensions to match dst_shape. if ragged_tensor.is_ragged(rt_input): inner_rank_diff = (rt_input.inner_values.shape.ndims - 1 - dst_shape.num_inner_dimensions) if inner_rank_diff > 0: rt_input = rt_input.with_inner_values( ragged_conversion_ops.from_tensor(rt_input.inner_values, ragged_rank=inner_rank_diff)) else: rt_input = ragged_conversion_ops.from_tensor( rt_input, ragged_rank=dst_shape.num_partitioned_dimensions - 1) # Do broadcasting for any dimensions that will remain uniform. We can do # these all at once, since they're independent of one another. multiples = [1] * dst_shape.rank for axis in range(dst_shape.num_partitioned_dimensions): if not src_shape.is_ragged(axis) and not dst_shape.is_ragged(axis): src_size = src_shape.dimension_size(axis) dst_size = dst_shape.dimension_size(axis) if ((tensor_util.constant_value(src_size) in (1, None)) and (tensor_util.constant_value(dst_size) != 1)): multiples[axis] = array_ops.where(math_ops.equal(src_size, 1), dst_size, 1) if not all(isinstance(v, int) and v == 1 for v in multiples): multiples = array_ops.stack(multiples, axis=0) rt_input = ragged_array_ops.tile(rt_input, multiples) if broadcast_inner_dimensions: rt_input = rt_input.with_inner_values( array_ops.reshape( rt_input.inner_values, array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0))) # Do broadcasting for dimensions that become ragged. We must do these from # outermost to innermost. for axis in range(dst_shape.num_partitioned_dimensions): if not src_shape.is_ragged(axis) and dst_shape.is_ragged(axis): dst_size = dst_shape.dimension_size(axis) rt_input = _ragged_tile_axis(rt_input, axis, dst_size) return rt_input
def assert_equal(x, y, data=None, summarize=None, message=None, name=None): """Assert the condition `x == y` holds element-wise. Example of adding a dependency to an operation: ```python with tf.control_dependencies([tf.assert_equal(x, y)]): output = tf.reduce_sum(x) ``` This condition holds if for every pair of (possibly broadcast) elements `x[i]`, `y[i]`, we have `x[i] == y[i]`. If both `x` and `y` are empty, this is trivially satisfied. Args: x: Numeric `Tensor`. y: Numeric `Tensor`, same dtype as and broadcastable to `x`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`, `y`. summarize: Print this many entries of each tensor. message: A string to prefix to the default message. name: A name for this operation (optional). Defaults to "assert_equal". Returns: Op that raises `InvalidArgumentError` if `x == y` is False. @compatibility{eager} returns None Raises: InvalidArgumentError: if the check can be performed immediately and `x == y` is False. The check can be performed immediately during eager execution or if `x` and `y` are statically known. """ message = message or '' with ops.name_scope(name, 'assert_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') if context.in_eager_mode(): eq = math_ops.equal(x, y) condition = math_ops.reduce_all(eq) if not condition: # Prepare a message with first elements of x and y. summary_msg = '' # Default to printing 3 elements like control_flow_ops.Assert (used # by graph mode) does. summarize = 3 if summarize is None else summarize if summarize: # reshape((-1,)) is the fastest way to get a flat array view. x_np = x.numpy().reshape((-1, )) y_np = y.numpy().reshape((-1, )) x_sum = min(x_np.size, summarize) y_sum = min(y_np.size, summarize) summary_msg = ('First %d elements of x:\n%s\n' 'First %d elements of y:\n%s\n' % (x_sum, x_np[:x_sum], y_sum, y_np[:y_sum])) # Get the values that actually differed and their indices. mask = math_ops.logical_not(eq) indices = array_ops.where(mask) indices_np = indices.numpy() x_vals = array_ops.boolean_mask(x, mask) y_vals = array_ops.boolean_mask(y, mask) summarize = min(summarize, indices_np.shape[0]) raise errors.InvalidArgumentError( node_def=None, op=None, message=('%s\nCondition x == y did not hold.\n' 'Indices of first %s different values:\n%s\n' 'Corresponding x values:\n%s\n' 'Corresponding y values:\n%s\n' '%s' % (message or '', summarize, indices_np[:summarize], x_vals.numpy().reshape( (-1, ))[:summarize], y_vals.numpy().reshape( (-1, ))[:summarize], summary_msg))) return if data is None: data = [ message, 'Condition x == y did not hold element-wise:', 'x (%s) = ' % x.name, x, 'y (%s) = ' % y.name, y ] condition = math_ops.reduce_all(math_ops.equal(x, y)) x_static = tensor_util.constant_value(x) y_static = tensor_util.constant_value(y) if x_static is not None and y_static is not None: condition_static = (x_static == y_static).all() _assert_static(condition_static, data) return control_flow_ops.Assert(condition, data, summarize=summarize)
def build_collective_gather_indexed_slices(input_slices_list, devices, group_size, collective_keys, communication_hint='AUTO', control_inputs=None, timeout=None): """Build a subgraph that all-gathers IndexedSlices using the collective Op. This method must be called in graph mode or inside a tf.function. Args: input_slices_list: a list of IndexedSlices within a single worker graph that are to be gathered together; must be one per device. devices: a list of device strings to run the collective on. group_size: total number of devices globally that will be doing this same gathering. The gathering will actually include the corresponding tensors at all these workers. collective_keys: a CollectiveKeys object. communication_hint: string providing hint to runtime for choosing collective implementation. control_inputs: if not None, add control edges between control_inputs and (index-wise) corresponding collective_reduce tensors timeout: a float or None. The timeout in seconds. Returns: An array of final IndexedSlices, one per device, computed by the full gather. Raises: ValueError: if control_inputs is not None and doesn't match the length and devices of inputs. """ assert not context.executing_eagerly(), ( 'build_collective_gather_indexed_slices can only be called in graph mode' ' or inside tf.function') if len(input_slices_list) != len(devices): raise ValueError( 'collective requires one input IndexedSlice for each device, %d != %d' % (len(input_slices_list), len(devices))) if group_size < 2: return input_slices_list group_key = collective_keys.get_group_key(devices) gather_length_key = collective_keys.get_op_instance_key() gather_indices_key = collective_keys.get_op_instance_key() gather_values_key = collective_keys.get_op_instance_key() reduce_densified_key = collective_keys.get_op_instance_key() # Current CollectiveAllGather implementations require input IndexedSlices to # have consistent length across the board, we handle the reduction of # IndexedSlices as follows: # 1. Gather the lengths of IndexedSlices from all participants. # 2. If they have consistent length, apply all_gather. # 3. Otherwise convert IndexedSlices to dense tensors and apply # all_reduce. out_slices_list = [] for idx, input_slices in enumerate(input_slices_list): # pylint: disable = cell-var-from-loop with ops.device(devices[idx]): def all_gather(): """Use all_gather to aggregate `IndexedSlices`.""" all_values = collective_ops.all_gather(input_slices.values, group_size, group_key, gather_values_key, communication_hint, timeout=timeout) # Add control dependency to order the all-gather. control = [all_values] if communication_hint == 'NCCL' else [] with ops.control_dependencies(control): all_indices = collective_ops.all_gather( input_slices.indices, group_size, group_key, gather_indices_key, communication_hint, timeout=timeout) return ops.IndexedSlices(values=all_values, indices=all_indices, dense_shape=input_slices.dense_shape) def densify_and_all_reduce(): """Use all_reduce to aggregate `IndexedSlices`.""" densified = ops.convert_to_tensor(input_slices) reduced = collective_ops.all_reduce(densified, group_size, group_key, reduce_densified_key, 'Add', 'Id', [0], communication_hint, timeout=timeout) # We have to convert dense grad to IndexedSlice because all_reduce() # and all_gather() must have the same return type as required by # control_flow_ops.cond. return ops.IndexedSlices(values=reduced, indices=math_ops.range( array_ops.shape(reduced)[0]), dense_shape=input_slices.dense_shape) length = array_ops.shape(input_slices.indices) with ops.control_dependencies( _control_input(input_slices, control_inputs, idx)): all_lengths = collective_ops.all_gather(length, group_size, group_key, gather_length_key, communication_hint, timeout=timeout) out_slices = control_flow_ops.cond( math_ops.equal(math_ops.reduce_max(all_lengths), math_ops.reduce_min(all_lengths)), all_gather, densify_and_all_reduce) out_slices_list.append(out_slices) # pylint: enable=cell-var-from-loop return out_slices_list
def stratified_sample(tensors, labels, target_probs, batch_size, init_probs=None, enqueue_many=False, queue_capacity=16, threads_per_queue=1, name=None): """Stochastically creates batches based on per-class probabilities. This method discards examples. Internally, it creates one queue to amortize the cost of disk reads, and one queue to hold the properly-proportioned batch. Args: tensors: List of tensors for data. All tensors are either one item or a batch, according to enqueue_many. labels: Tensor for label of data. Label is a single integer or a batch, depending on `enqueue_many`. It is not a one-hot vector. target_probs: Target class proportions in batch. An object whose type has a registered Tensor conversion function. batch_size: Size of batch to be returned. init_probs: Class proportions in the data. An object whose type has a registered Tensor conversion function, or `None` for estimating the initial distribution. enqueue_many: Bool. If true, interpret input tensors as having a batch dimension. queue_capacity: Capacity of the large queue that holds input examples. threads_per_queue: Number of threads for the large queue that holds input examples and for the final queue with the proper class proportions. name: Optional prefix for ops created by this function. Raises: ValueError: If `tensors` isn't iterable. ValueError: `enqueue_many` is True and labels doesn't have a batch dimension, or if `enqueue_many` is False and labels isn't a scalar. ValueError: `enqueue_many` is True, and batch dimension on data and labels don't match. ValueError: if probs don't sum to one. ValueError: if a zero initial probability class has a nonzero target probability. TFAssertion: if labels aren't integers in [0, num classes). Returns: (data_batch, label_batch), where data_batch is a list of tensors of the same length as `tensors` Example: # Get tensor for a single data and label example. data, label = data_provider.Get(['data', 'label']) # Get stratified batch according to per-class probabilities. target_probs = [...distribution you want...] [data_batch], labels = tf.contrib.training.stratified_sample( [data], label, target_probs) # Run batch through network. ... """ with ops.name_scope(name, 'stratified_sample', list(tensors) + [labels]): tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors) labels = ops.convert_to_tensor(labels) target_probs = ops.convert_to_tensor(target_probs, dtype=dtypes.float32) # Reduce the case of a single example to that of a batch of size 1. if not enqueue_many: tensor_list = [array_ops.expand_dims(tensor, 0) for tensor in tensor_list] labels = array_ops.expand_dims(labels, 0) # If `init_probs` is `None`, set up online estimation of data distribution. if init_probs is None: # We use `target_probs` to get the number of classes, so its shape must be # fully defined at graph construction time. target_probs.get_shape().assert_is_fully_defined() init_probs = _estimate_data_distribution( labels, target_probs.get_shape().num_elements()) else: init_probs = ops.convert_to_tensor(init_probs, dtype=dtypes.float32) # Validate that input is consistent. tensor_list, labels, [init_probs, target_probs] = _verify_input( tensor_list, labels, [init_probs, target_probs]) # Check that all zero initial probabilities also have zero target # probabilities. assert_op = control_flow_ops.Assert( math_ops.reduce_all( math_ops.logical_or( math_ops.not_equal(init_probs, 0), math_ops.equal(target_probs, 0))), ['All classes with zero initial probability must also have zero target ' 'probability: ', init_probs, target_probs ]) init_probs = control_flow_ops.with_dependencies([assert_op], init_probs) # Calculate acceptance sampling probabilities. accept_probs = _calculate_acceptance_probabilities(init_probs, target_probs) proportion_rejected = math_ops.reduce_sum((1 - accept_probs) * init_probs) accept_probs = control_flow_ops.cond( math_ops.less(proportion_rejected, .5), lambda: accept_probs, lambda: logging_ops.Print( # pylint: disable=g-long-lambda accept_probs, [accept_probs], message='Proportion of examples rejected by sampler is high.', first_n=10)) # Make a single queue to hold input examples. Reshape output so examples # don't have singleton batch dimension. batched = input_ops.batch( tensor_list + [labels], batch_size=1, num_threads=threads_per_queue, capacity=queue_capacity, enqueue_many=True) val_list = [array_ops.squeeze(x, [0]) for x in batched[:-1]] label = array_ops.squeeze(batched[-1], [0]) # Set up second queue containing batches that have the desired class # proportions. cur_prob = array_ops.gather(accept_probs, label) batched = input_ops.maybe_batch( val_list + [label], keep_input=random_ops.random_uniform([]) < cur_prob, batch_size=batch_size, num_threads=threads_per_queue) return batched[:-1], batched[-1]
def assert_broadcastable(weights, values): """Asserts `weights` can be broadcast to `values`. In `tf.losses` and `tf.metrics`, we support limited weight broadcasting. We let weights be either scalar, or the same rank as the target values, with each dimension either 1, or the same as the corresponding values dimension. Args: weights: `Tensor` of weights. values: `Tensor` of values to which weights are applied. Returns: `Operation` raising `InvalidArgumentError` if `weights` has incorrect shape. `no_op` if static checks determine `weights` has correct shape. Raises: ValueError: If static checks determine `weights` has incorrect shape. """ with ops.name_scope(None, "assert_broadcastable", (weights, values)) as scope: with ops.name_scope(None, "weights", (weights, )) as weights_scope: weights = ops.convert_to_tensor(weights, name=weights_scope) weights_shape = array_ops.shape(weights, name="shape") weights_rank = array_ops.rank(weights, name="rank") weights_rank_static = tensor_util.constant_value(weights_rank) with ops.name_scope(None, "values", (values, )) as values_scope: values = ops.convert_to_tensor(values, name=values_scope) values_shape = array_ops.shape(values, name="shape") values_rank = array_ops.rank(values, name="rank") values_rank_static = tensor_util.constant_value(values_rank) # Try static checks. if weights_rank_static is not None and values_rank_static is not None: if weights_rank_static == 0: return control_flow_ops.no_op( name="static_scalar_check_success") if weights_rank_static != values_rank_static: raise ValueError( "%s values.rank=%s. weights.rank=%s." " values.shape=%s. weights.shape=%s." % (_ASSERT_BROADCASTABLE_ERROR_PREFIX, values_rank_static, weights_rank_static, values.shape, weights.shape)) weights_shape_static = tensor_util.constant_value(weights_shape) values_shape_static = tensor_util.constant_value(values_shape) if weights_shape_static is not None and values_shape_static is not None: # Sanity check, this should always be true since we checked rank above. ndims = len(values_shape_static) assert ndims == len(weights_shape_static) for i in range(ndims): if weights_shape_static[i] not in (1, values_shape_static[i]): raise ValueError( "%s Mismatch at dim %s. values.shape=%s weights.shape=%s." % (_ASSERT_BROADCASTABLE_ERROR_PREFIX, i, values_shape_static, weights_shape_static)) return control_flow_ops.no_op(name="static_dims_check_success") # Dynamic checks. is_scalar = math_ops.equal(0, weights_rank, name="is_scalar") data = ( _ASSERT_BROADCASTABLE_ERROR_PREFIX, "weights.shape=", weights.name, weights_shape, "values.shape=", values.name, values_shape, "is_scalar=", is_scalar, ) is_valid_shape = control_flow_ops.cond( is_scalar, lambda: is_scalar, lambda: _has_valid_nonscalar_shape( # pylint: disable=g-long-lambda weights_rank, weights_shape, values_rank, values_shape), name="is_valid_shape") return control_flow_ops.Assert(is_valid_shape, data, name=scope)
def tfassert_eq(_): x = array_ops.placeholder(dtypes.int32, name='x_hold') y = array_ops.placeholder(dtypes.int32, name='y_hold') control_flow_ops.Assert(math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq') math_ops.add(x, math_ops.negative(y), name='x_y_diff')
def npairs_loss(labels, embeddings_anchor, embeddings_positive, reg_lambda=3e-3, print_losses=False): """Computes the npairs loss. Npairs loss expects paired data where a pair is composed of samples from the same labels and each pairs in the minibatch have different labels. The loss has two components. The first component is the L2 regularizer on the embedding vectors. The second component is the sum of cross entropy loss which takes each row of the pair-wise similarity matrix as logits and the remapped one-hot labels as labels. See: http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf Args: labels: 1-D tf.int32 `Tensor` of shape [batch_size/2]. embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the embedding vectors for the anchor images. Embeddings should not be l2 normalized. embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the embedding vectors for the positive images. Embeddings should not be l2 normalized. reg_lambda: Float. L2 regularization term on the embedding vectors. print_losses: Boolean. Option to print the xent and l2loss. Returns: npairs_loss: tf.float32 scalar. """ # pylint: enable=line-too-long # Add the regularizer on the embedding. reg_anchor = math_ops.reduce_mean( math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1)) reg_positive = math_ops.reduce_mean( math_ops.reduce_sum(math_ops.square(embeddings_positive), 1)) l2loss = math_ops.multiply(0.25 * reg_lambda, reg_anchor + reg_positive, name='l2loss') # Get per pair similarities. similarity_matrix = math_ops.matmul(embeddings_anchor, embeddings_positive, transpose_a=False, transpose_b=True) # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) labels_remapped = math_ops.to_float( math_ops.equal(labels, array_ops.transpose(labels))) labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True) # Add the softmax loss. xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix, labels=labels_remapped) xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy') if print_losses: xent_loss = logging_ops.Print( xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss]) return l2loss + xent_loss
def _maybe_adjust_weights(): return control_flow_ops.cond(math_ops.equal(rank_diff, 1), maybe_squeeze_weights, _maybe_expand_weights)
def triplet_semihard_loss(labels, embeddings, margin=1.0): """Computes the triplet loss with semi-hard negative mining. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than the minimum negative distance among which are at least greater than the positive distance plus the margin constant (called semi-hard negative) in the mini-batch. If no such negative exists, uses the largest negative distance instead. See: https://arxiv.org/abs/1503.03832. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. Returns: triplet_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) # Compute the mask. pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1]) mask = math_ops.logical_and( array_ops.tile(adjacency_not, [batch_size, 1]), math_ops.greater( pdist_matrix_tile, array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1]))) mask_final = array_ops.reshape( math_ops.greater( math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32), 1, keep_dims=True), 0.0), [batch_size, batch_size]) mask_final = array_ops.transpose(mask_final) adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32) mask = math_ops.cast(mask, dtype=dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = array_ops.reshape( masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = array_ops.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = array_ops.tile( masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = array_ops.where(mask_final, negatives_outside, negatives_inside) loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = math_ops.reduce_sum(mask_positives) _triplet_loss = math_ops.truediv(math_ops.reduce_sum( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)), num_positives, name='triplet_semihard_loss') return _triplet_loss
def equal_(x, y): if _is_tensor(x) or _is_tensor(y): return math_ops.equal(x, y) else: return x == y
def posterior_from_prior_state(self, prior_state, prior_state_var, observation, observation_model, predicted_observations, observation_noise): """Compute a posterior over states given an observation. Args: prior_state: Prior state mean [batch size x state dimension] prior_state_var: Prior state covariance [batch size x state dimension x state dimension] observation: The observed value corresponding to the predictions given [batch size x observation dimension] observation_model: The [batch size x observation dimension x model state dimension] Tensor indicating how a particular state is mapped to (pre-noise) observations for each part of the batch. predicted_observations: An (observation mean, observation variance) tuple computed based on the current state, usually the output of observed_from_state. observation_noise: A [batch size x observation dimension x observation dimension] or [observation dimension x observation dimension] Tensor with covariance matrices to use for each part of the batch (a two-dimensional input will be broadcast). Returns: Posterior mean and covariance (dimensions matching the first two arguments). """ observed_mean, observed_var = predicted_observations residual = observation - observed_mean # TODO(allenl): Can more of this be done using matrix_solve_ls? kalman_solve_rhs = math_ops.matmul(observation_model, prior_state_var, adjoint_b=True) # This matrix_solve adjoint doesn't make a difference symbolically (since # observed_var is a covariance matrix, and should be symmetric), but # filtering on multivariate series is unstable without it. See # test_multivariate_symmetric_covariance_float64 in kalman_filter_test.py # for an example of the instability (fails with adjoint=False). kalman_gain_transposed = linalg_ops.matrix_solve(matrix=observed_var, rhs=kalman_solve_rhs, adjoint=True) posterior_state = prior_state + array_ops.squeeze(math_ops.matmul( kalman_gain_transposed, array_ops.expand_dims(residual, -1), adjoint_a=True), squeeze_dims=[-1]) gain_obs = math_ops.matmul(kalman_gain_transposed, observation_model, adjoint_a=True) identity_extradim = linalg_ops.eye(array_ops.shape(gain_obs)[1], dtype=gain_obs.dtype)[None] identity_minus_factor = identity_extradim - gain_obs if self._simplified_posterior_covariance_computation: # posterior covariance = # (I - kalman_gain * observation_model) * prior_state_var posterior_state_var = math_ops.matmul(identity_minus_factor, prior_state_var) else: observation_noise = ops.convert_to_tensor(observation_noise) # A Joseph form update, which provides better numeric stability than the # simplified optimal Kalman gain update, at the cost of a few extra # operations. Joseph form updates are valid for any gain (not just the # optimal Kalman gain), and so are more forgiving of numerical errors in # computing the optimal Kalman gain. # # posterior covariance = # (I - kalman_gain * observation_model) * prior_state_var # * (I - kalman_gain * observation_model)^T # + kalman_gain * observation_noise * kalman_gain^T left_multiplied_state_var = math_ops.matmul( identity_minus_factor, prior_state_var) multiplied_state_var = math_ops.matmul(identity_minus_factor, left_multiplied_state_var, adjoint_b=True) def _batch_observation_noise_update(): return (multiplied_state_var + math_ops.matmul( math_ops.matmul(kalman_gain_transposed, observation_noise, adjoint_a=True), kalman_gain_transposed)) def _matrix_observation_noise_update(): return (multiplied_state_var + math_ops.matmul( math_utils.batch_times_matrix( kalman_gain_transposed, observation_noise, adj_x=True), kalman_gain_transposed)) if observation_noise.get_shape().ndims is None: posterior_state_var = control_flow_ops.cond( math_ops.equal(array_ops.rank(observation_noise), 2), _matrix_observation_noise_update, _batch_observation_noise_update) else: # If static shape information exists, it gets checked in each cond() # branch, so we need a special case to avoid graph-build-time # exceptions. if observation_noise.get_shape().ndims == 2: posterior_state_var = _matrix_observation_noise_update() else: posterior_state_var = _batch_observation_noise_update() return posterior_state, posterior_state_var
def lifted_struct_loss(y_true, y_preds, margin=1.0): """Computes the lifted structured loss. The loss encourages the positive distances (between a pair of embeddings with the same labels) to be smaller than any negative distances (between a pair of embeddings with different labels) in the mini-batch in a way that is differentiable with respect to the embedding vectors. See: https://arxiv.org/abs/1511.06452. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not be l2 normalized. margin: Float, margin term in the loss definition. Returns: lifted_loss: tf.float32 scalar. """ labels = y_true embeddings = y_preds # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = array_ops.shape(labels) # assert lshape.shape == 1 labels = array_ops.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pairwise_distances = pairwise_distance(embeddings) # Build pairwise binary adjacency matrix. adjacency = math_ops.equal(labels, array_ops.transpose(labels)) # Invert so we can select negatives only. adjacency_not = math_ops.logical_not(adjacency) batch_size = array_ops.size(labels) diff = margin - pairwise_distances mask = math_ops.cast(adjacency_not, dtype=dtypes.float32) # Safe maximum: Temporarily shift negative distances # above zero before taking max. # this is to take the max only among negatives. row_minimums = math_ops.reduce_min(diff, 1, keepdims=True) row_negative_maximums = (math_ops.reduce_max( math_ops.multiply(diff - row_minimums, mask), 1, keepdims=True) + row_minimums) # Compute the loss. # Keep track of matrix of maximums where M_ij = max(m_i, m_j) # where m_i is the max of alpha - negative D_i's. # This matches the Caffe loss layer implementation at: # https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp # pylint: disable=line-too-long max_elements = math_ops.maximum(row_negative_maximums, array_ops.transpose(row_negative_maximums)) diff_tiled = array_ops.tile(diff, [batch_size, 1]) mask_tiled = array_ops.tile(mask, [batch_size, 1]) max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements), [-1, 1]) loss_exp_left = array_ops.reshape( math_ops.reduce_sum( math_ops.multiply(math_ops.exp(diff_tiled - max_elements_vect), mask_tiled), 1, keepdims=True, ), [batch_size, batch_size], ) loss_mat = max_elements + math_ops.log(loss_exp_left + array_ops.transpose(loss_exp_left)) # Add the positive distance. loss_mat += pairwise_distances mask_positives = math_ops.cast(adjacency, dtype=dtypes.float32) - array_ops.diag( array_ops.ones([batch_size])) # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2. num_positives = math_ops.reduce_sum(mask_positives) / 2.0 lifted_loss = math_ops.truediv( 0.25 * math_ops.reduce_sum( math_ops.square( math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0))), num_positives, name='liftedstruct_loss', ) return lifted_loss
def indicators_to_sparse_ids(indicators, ignore_value=None, dtype=dtypes.int64): """Convert a dense indicator tensor to sparse IDs. This is commonly used for converting a dense classification label to sparse. In the following example, we have an input of shape (2, 2, num_classes), where num_classes=4. ```python indicators = [ [ [0, 0, 1, 0], [0, 0, 0, 0] ], [ [1, 0, 1, 1], [0, 0, 1, 0] ] ] sparse_ids = indicator_to_sparse_ids(indicators) ``` `sparse_ids` in "jagged" format: [ [ [2], [] ], [ [0, 2, 3], [2] ] ] `sparse_ids` in `SparseTensor` format: ```python { indices: [[0, 0, 1], [1, 0, 0], [1, 0, 1], [1, 0, 2], [1, 1, 0]], values: [2, 0, 2, 3, 2], dense_shape: [2, 2, 3] } ``` Args: indicators: Dense `Tensor` of shape `(d0, ..., dn, num_classes)`. `ignore_value` values are ignored. For other values (typically, ones), the index along the last dimension is returned. ignore_value: Entries in `indicators` equal to this value will be absent from the returned `SparseTensor`. If `None`, default value of `indicators` dtype will be used (e.g. '' for `str`, 0 for `int`). dtype: Type of result, must be integer type. Returns: `SparseTensor` of type `dtype` and shape `(d0, ..., dn, max_num_labels)`, where `max_num_labels` is the maximum number of non-zero values in any row (in the example above, row (1, 1) has 3 non-zero values, so the result shape is (2, 2, 3)). The values of this `SparseTensor` are in the range `[0, num_classes)` and correspond to the index of non-ignore values along the last dimension of `indicators`. Raises: ValueError: if `dtype` is not integer. """ if not dtype.is_integer: raise ValueError("Invalid dtype {} not integer.".format(dtype)) with ops.name_scope( None, "indicators_to_sparse_ids", (indicators, ignore_value)): # Convert indicators to binary ones and zeros. We use int64 since # SparseTensor requires int64 indices. indicators = ops.convert_to_tensor(indicators, name="indicators") missing_indicators = math_ops.equal( indicators, _ignore_value_tensor(indicators.dtype, ignore_value), name="missing") zeros_like_indicators = array_ops.zeros_like( indicators, dtype=dtypes.int64, name="zeros") binary_indicators = array_ops.where( missing_indicators, zeros_like_indicators, array_ops.ones_like(indicators, dtype=dtypes.int64, name="ones"), name="binary_indicators") # Use cumsum along the last dimension to generate per-row indexes. # Note that these are 1-based (since 0 indicates missing values), so they're # off-by-1 from the actual indices. We'll subtract 1 below. Since they're # off-by-one, the max value is the size of the last dimension (i.e., # last_index + 1). row_index_indicators = array_ops.where( missing_indicators, zeros_like_indicators, math_ops.cumsum(binary_indicators, axis=-1), "row_index_indicators") result_last_dim = array_ops.reshape( math_ops.reduce_max(row_index_indicators), shape=(1,), name="result_last_dim") # Convert to a SparseTensor. The values of this SparseTensor are the last # indices of our result, and the last indices of this SparseTensor (i.e., # the class IDs indicated by `indicators`) are the values of our result, so # we use tensor slicing and concat to swap them. sparse_row_index_indicators = dense_to_sparse_tensor( row_index_indicators, ignore_value=0) return sparse_tensor.SparseTensor( indices=array_ops.concat(( sparse_row_index_indicators.indices[:, :-1], array_ops.reshape(sparse_row_index_indicators.values - 1, (-1, 1)) ), axis=1, name="indices"), values=math_ops.cast( sparse_row_index_indicators.indices[:, -1], dtype=dtype, name="values"), dense_shape=array_ops.concat( (sparse_row_index_indicators.dense_shape[0:-1], result_last_dim), axis=0, name="dense_shape"))
def batch_matrix_pow(matrices, powers): """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A). Uses exponentiation by squaring, with O(log(p)) matrix multiplications to compute A^p. Args: matrices: [batch size x N x N] powers: Which integer power to raise each matrix to [batch size] Returns: The matrices raised to their respective powers, same dimensions as the "matrices" argument. """ def terminate_when_all_zero(current_argument, residual_powers, accumulator): del current_argument, accumulator # not used for condition do_exit = math_ops.reduce_any( math_ops.greater(residual_powers, array_ops.ones_like(residual_powers))) return do_exit def do_iteration(current_argument, residual_powers, accumulator): """Compute one step of iterative exponentiation by squaring. The recursive form is: power(A, p) = { power(matmul(A, A), p / 2) for even p { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I The power(A, 0) = I case is handled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. Args: current_argument: On this step, what is the first argument (A^2..^2) to the (unrolled) recursive function? [batch size x N x N] residual_powers: On this step, what is the second argument (residual p)? [batch_size] accumulator: Accumulates the exterior multiplications from the odd powers (initially the identity matrix). [batch_size x N x N] Returns: Updated versions of each argument for one step of the unrolled computation. Does not change parts of the batch which have a residual power of zero. """ is_even = math_ops.equal( residual_powers % 2, array_ops.zeros(array_ops.shape(residual_powers), dtype=dtypes.int32)) new_accumulator = array_ops.where( is_even, accumulator, math_ops.matmul(accumulator, current_argument)) new_argument = math_ops.matmul(current_argument, current_argument) do_update = math_ops.greater(residual_powers, 1) new_residual_powers = residual_powers - residual_powers % 2 new_residual_powers //= 2 # Stop updating if we've reached our base case; some batch elements may # finish sooner than others accumulator = array_ops.where(do_update, new_accumulator, accumulator) current_argument = array_ops.where(do_update, new_argument, current_argument) residual_powers = array_ops.where(do_update, new_residual_powers, residual_powers) return (current_argument, residual_powers, accumulator) matrices = ops.convert_to_tensor(matrices) powers = math_ops.cast(powers, dtype=dtypes.int32) ident = array_ops.expand_dims( array_ops.diag( array_ops.ones([array_ops.shape(matrices)[1]], dtype=matrices.dtype)), 0) ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1]) (final_argument, final_residual_power, final_accumulator) = control_flow_ops.while_loop( terminate_when_all_zero, do_iteration, [matrices, powers, ident_tiled]) return array_ops.where( math_ops.equal( final_residual_power, array_ops.zeros_like(final_residual_power, dtype=dtypes.int32)), ident_tiled, math_ops.matmul(final_argument, final_accumulator))
def op(self): """Returns the cluster initializer op.""" return control_flow_ops.cond( math_ops.equal(self._num_remaining, 0), lambda: check_ops.assert_equal(self._cluster_centers_initialized, True), self._initialize)
def _introspect_ndims(self, ndims): """Helper to establish some properties of input ndims args.""" if self._is_all_constant_helper(ndims): return (tensor_util.constant_value(ndims), tensor_util.constant_value(ndims) == 0) return None, math_ops.equal(ndims, 0)
def update_confusion_matrix_variables(variables_to_update, y_true, y_pred, thresholds, top_k=None, class_id=None, sample_weight=None, multi_label=False, label_weights=None): """Returns op to update the given confusion matrix variables. For every pair of values in y_true and y_pred: true_positive: y_true == True and y_pred > thresholds false_negatives: y_true == True and y_pred <= thresholds true_negatives: y_true == False and y_pred <= thresholds false_positive: y_true == False and y_pred > thresholds The results will be weighted and added together. When multiple thresholds are provided, we will repeat the same for every threshold. For estimation of these metrics over a stream of data, the function creates an `update_op` operation that updates the given variables. If `sample_weight` is `None`, weights default to 1. Use weights of 0 to mask values. Args: variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys and corresponding variables to update as values. y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`. y_pred: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A float value or a python list or tuple of float thresholds in `[0, 1]`, or NEG_INF (used when top_k is set). top_k: Optional int, indicates that the positive labels should be limited to the top k predictions. class_id: Optional int, limits the prediction and labels to the class specified by this argument. sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must be either `1`, or the same as the corresponding `y_true` dimension). multi_label: Optional boolean indicating whether multidimensional prediction/labels should be treated as multilabel responses, or flattened into a single label. When True, the valus of `variables_to_update` must have a second dimension equal to the number of labels in y_true and y_pred, and those tensors must not be RaggedTensors. label_weights: (optional) tensor of non-negative weights for multilabel data. The weights are applied when calculating TP, FP, FN, and TN without explicit multilabel handling (i.e. when the data is to be flattened). Returns: Update op. Raises: ValueError: If `y_pred` and `y_true` have mismatched shapes, or if `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if `variables_to_update` contains invalid keys. """ if multi_label and label_weights is not None: raise ValueError( '`label_weights` for multilabel data should be handled ' 'outside of `update_confusion_matrix_variables` when ' '`multi_label` is True.') if variables_to_update is None: return y_true = math_ops.cast(y_true, dtype=dtypes.float32) y_pred = math_ops.cast(y_pred, dtype=dtypes.float32) if multi_label: thresh_shape = array_ops.shape(thresholds) num_thresholds = thresh_shape[0] one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32), array_ops.rank(thresholds), name='one_set_of_thresholds_cond') else: [y_pred, y_true ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true], sample_weight) num_thresholds = len(to_list(thresholds)) one_thresh = math_ops.cast(True, dtype=dtypes.bool) y_pred.shape.assert_is_compatible_with(y_true.shape) if not any(key for key in variables_to_update if key in list(ConfusionMatrix)): raise ValueError( 'Please provide at least one valid confusion matrix ' 'variable to update. Valid variable key options are: "{}". ' 'Received: "{}"'.format(list(ConfusionMatrix), variables_to_update.keys())) invalid_keys = [ key for key in variables_to_update if key not in list(ConfusionMatrix) ] if invalid_keys: raise ValueError( 'Invalid keys: {}. Valid variable key options are: "{}"'.format( invalid_keys, list(ConfusionMatrix))) with ops.control_dependencies([ check_ops.assert_greater_equal(y_pred, math_ops.cast(0.0, dtype=y_pred.dtype), message='predictions must be >= 0'), check_ops.assert_less_equal(y_pred, math_ops.cast(1.0, dtype=y_pred.dtype), message='predictions must be <= 1') ]): if sample_weight is None: y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: y_pred, y_true, sample_weight = ( tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) if top_k is not None: y_pred = _filter_top_k(y_pred, top_k) if class_id is not None: y_true = y_true[..., class_id] y_pred = y_pred[..., class_id] pred_shape = array_ops.shape(y_pred) num_predictions = pred_shape[0] if y_pred.shape.ndims == 1: num_labels = 1 else: num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0) thresh_label_tile = control_flow_ops.cond( one_thresh, lambda: num_labels, lambda: math_ops.cast(1, dtype=dtypes.int32)) # Reshape predictions and labels, adding a dim for thresholding. if multi_label: predictions_extra_dim = array_ops.expand_dims(y_pred, 0) labels_extra_dim = array_ops.expand_dims( math_ops.cast(y_true, dtype=dtypes.bool), 0) else: # Flatten predictions and labels when not multilabel. predictions_extra_dim = array_ops.reshape(y_pred, [1, -1]) labels_extra_dim = array_ops.reshape( math_ops.cast(y_true, dtype=dtypes.bool), [1, -1]) # Tile the thresholds for every prediction. if multi_label: thresh_pretile_shape = [num_thresholds, 1, -1] thresh_tiles = [1, num_predictions, thresh_label_tile] data_tiles = [num_thresholds, 1, 1] else: thresh_pretile_shape = [num_thresholds, -1] thresh_tiles = [1, num_predictions * num_labels] data_tiles = [num_thresholds, 1] thresh_tiled = array_ops.tile( array_ops.reshape(array_ops.constant(thresholds, dtype=dtypes.float32), thresh_pretile_shape), array_ops.stack(thresh_tiles)) # Tile the predictions for every threshold. preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles) # Compare predictions and threshold. pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled) # Tile labels by number of thresholds label_is_pos = array_ops.tile(labels_extra_dim, data_tiles) if sample_weight is not None: sample_weight = weights_broadcast_ops.broadcast_weights( math_ops.cast(sample_weight, dtype=dtypes.float32), y_pred) weights_tiled = array_ops.tile( array_ops.reshape(sample_weight, thresh_tiles), data_tiles) else: weights_tiled = None if label_weights is not None and not multi_label: label_weights = array_ops.expand_dims(label_weights, 0) label_weights = weights_broadcast_ops.broadcast_weights( label_weights, y_pred) label_weights_tiled = array_ops.tile( array_ops.reshape(label_weights, thresh_tiles), data_tiles) if weights_tiled is None: weights_tiled = label_weights_tiled else: weights_tiled = math_ops.multiply(weights_tiled, label_weights_tiled) update_ops = [] def weighted_assign_add(label, pred, weights, var): label_and_pred = math_ops.cast(math_ops.logical_and(label, pred), dtype=dtypes.float32) if weights is not None: label_and_pred *= weights return var.assign_add(math_ops.reduce_sum(label_and_pred, 1)) loop_vars = { ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos), } update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update if update_fn or update_tn: pred_is_neg = math_ops.logical_not(pred_is_pos) loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos, pred_is_neg) if update_fp or update_tn: label_is_neg = math_ops.logical_not(label_is_pos) loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg, pred_is_pos) if update_tn: loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg, pred_is_neg) for matrix_cond, (label, pred) in loop_vars.items(): if matrix_cond in variables_to_update: update_ops.append( weighted_assign_add(label, pred, weights_tiled, variables_to_update[matrix_cond])) return control_flow_ops.group(update_ops)
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_all_variables()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.initialize_local_variables()` and `tf.initialize_all_tables()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is negative. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if sync_optimizer and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.' ) if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() if init_op == _USE_DEFAULT: init_op = tf_variables.initialize_all_variables() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.initialize_local_variables(), data_flow_ops.initialize_all_tables()) cleanup_op = None if is_chief and sync_optimizer: if not isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer' ) # Need to create these BEFORE the supervisor finalizes the graph: with ops.control_dependencies([init_op]): init_tokens_op = sync_optimizer.get_init_tokens_op() init_op = init_tokens_op chief_queue_runner = sync_optimizer.get_chief_queue_runner() cleanup_op = sync_optimizer.get_clean_up_op() if train_step_kwargs == _USE_DEFAULT: train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal( global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) sv = supervisor.Supervisor(graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_op=ready_op, summary_op=summary_op, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) should_retry = True while should_retry: try: should_retry = False with sv.managed_session(master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step( sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer: sv.start_queue_runners(sess, [chief_queue_runner]) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break if logdir and sv.is_chief: logging.info( 'Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except: if sv.is_chief and cleanup_op is not None: logging.info('About to execute sync_clean_up_op!') sess.run(cleanup_op) raise except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def training_graph(self, input_data, input_labels, random_seed, data_spec, input_weights=None): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or SparseTensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. data_spec: A list of tf.dtype values specifying the original types of each column. input_weights: A float tensor or placeholder holding per-input weights, or None if all inputs are to be weighted equally. Returns: The last op in the random tree training graph. """ epoch = math_ops.to_int32(get_epoch_variable()) if input_weights is None: input_weights = [] sparse_indices = [] sparse_values = [] sparse_shape = [] if isinstance(input_data, sparse_tensor.SparseTensor): sparse_indices = input_data.indices sparse_values = input_data.values sparse_shape = input_data.dense_shape input_data = [] # Count extremely random stats. (node_sums, node_squares, splits_indices, splits_sums, splits_squares, totals_indices, totals_sums, totals_squares, input_leaves) = (tensor_forest_ops.count_extremely_random_stats( input_data, sparse_indices, sparse_values, sparse_shape, data_spec, input_labels, input_weights, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, self.variables.start_epoch, epoch, num_classes=self.params.num_output_columns, regression=self.params.regression)) node_update_ops = [] node_update_ops.append( state_ops.assign_add(self.variables.node_sums, node_sums)) splits_update_ops = [] splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums, splits_indices, splits_sums)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums, totals_indices, totals_sums)) if self.params.regression: node_update_ops.append(state_ops.assign_add(self.variables.node_squares, node_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim( self.variables.candidate_split_squares, splits_indices, splits_squares)) splits_update_ops.append( tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares, totals_indices, totals_squares)) # Sample inputs. update_indices, feature_updates, threshold_updates = ( tensor_forest_ops.sample_inputs( input_data, sparse_indices, sparse_values, sparse_shape, input_weights, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = state_ops.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = state_ops.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with ops.control_dependencies(splits_update_ops): # Passing input_leaves to finished nodes here means that nodes that # have become stale won't be deallocated until an input reaches them, # because we're trying to avoid considering every fertile node for # performance reasons. finished, stale = tensor_forest_ops.finished_nodes( input_leaves, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, self.variables.start_epoch, epoch, num_split_after_samples=self.params.split_after_samples, min_split_samples=self.params.min_split_samples, dominate_method=self.params.dominate_method, dominate_fraction=self.params.dominate_fraction) # Update leaf scores. # TODO(thomaswc): Store the leaf scores in a TopN and only update the # scores of the leaves that were touched by this batch of input. children = array_ops.squeeze( array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32( array_ops.squeeze( array_ops.where(is_leaf), squeeze_dims=[1])) non_fertile_leaves = array_ops.boolean_mask( leaves, math_ops.less(array_ops.gather( self.variables.node_to_accumulator_map, leaves), 0)) # TODO(gilberth): It should be possible to limit the number of non # fertile leaves we calculate scores for, especially since we can only take # at most array_ops.shape(finished)[0] of them. with ops.control_dependencies(node_update_ops): sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves) if self.params.regression: squares = array_ops.gather(self.variables.node_squares, non_fertile_leaves) non_fertile_leaf_scores = self._variance(sums, squares) else: non_fertile_leaf_scores = self._weighted_gini(sums) # Calculate best splits. with ops.control_dependencies(splits_update_ops): split_indices = tensor_forest_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_sums, self.variables.candidate_split_squares, self.variables.accumulator_sums, self.variables.accumulator_squares, regression=self.params.regression) # Grow tree. with ops.control_dependencies([update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, new_eot) = (tensor_forest_ops.grow_tree( self.variables.end_of_tree, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = state_ops.scatter_update( self.variables.tree, tree_update_indices, tree_children_updates) thresholds_update_op = state_ops.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) # TODO(thomaswc): Only update the epoch on the new leaves. new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates, dtype=dtypes.int32) epoch_update_op = state_ops.scatter_update( self.variables.start_epoch, tree_update_indices, new_epoch_updates) # Update fertile slots. with ops.control_dependencies([tree_update_op]): (n2a_map_updates, a2n_map_updates, accumulators_cleared, accumulators_allocated) = (tensor_forest_ops.update_fertile_slots( finished, non_fertile_leaves, non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.accumulator_sums, self.variables.node_to_accumulator_map, stale, self.variables.node_sums, regression=self.params.regression)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = control_flow_ops.tuple( [new_eot], control_inputs=[n2a_map_updates]) eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(thresholds_update_op) updates.append(epoch_update_op) updates.append( state_ops.scatter_update(self.variables.node_to_accumulator_map, n2a_map_updates[0], n2a_map_updates[1])) updates.append( state_ops.scatter_update(self.variables.accumulator_to_node_map, a2n_map_updates[0], a2n_map_updates[1])) cleared_and_allocated_accumulators = array_ops.concat_v2( [accumulators_cleared, accumulators_allocated], 0) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. split_values = array_ops.tile( array_ops.expand_dims(array_ops.expand_dims( array_ops.zeros_like(cleared_and_allocated_accumulators, dtype=dtypes.float32), 1), 2), [1, self.params.num_splits_to_consider, self.params.num_output_columns]) updates.append(state_ops.scatter_update( self.variables.candidate_split_sums, cleared_and_allocated_accumulators, split_values)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.candidate_split_squares, cleared_and_allocated_accumulators, split_values)) # Calculate values to put into scatter update for total counts. total_cleared = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like(accumulators_cleared, dtype=dtypes.float32)), 1), [1, self.params.num_output_columns]) total_reset = array_ops.tile( array_ops.expand_dims( array_ops.zeros_like(accumulators_allocated, dtype=dtypes.float32), 1), [1, self.params.num_output_columns]) accumulator_updates = array_ops.concat_v2([total_cleared, total_reset], 0) updates.append(state_ops.scatter_update( self.variables.accumulator_sums, cleared_and_allocated_accumulators, accumulator_updates)) if self.params.regression: updates.append(state_ops.scatter_update( self.variables.accumulator_squares, cleared_and_allocated_accumulators, accumulator_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = array_ops.tile( array_ops.expand_dims( math_ops.neg(array_ops.ones_like( cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append(state_ops.scatter_update( self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) updates += self.finish_iteration() return control_flow_ops.group(*updates)
def err(labels, predictions, weights=None, metrics_collections=None, updates_collections=None, name=None): """Calculates how often `predictions` matches `labels`. The `accuracy` function creates two local variables, `total` and `count` that are used to compute the frequency with which `predictions` matches `labels`. This frequency is ultimately returned as `accuracy`: an idempotent operation that simply divides `total` by `count`. For estimation of the metric over a stream of data, the function creates an `update_op` operation that updates these variables and returns the `accuracy`. Internally, an `is_correct` operation computes a `Tensor` with elements 1.0 where the corresponding elements of `predictions` and `labels` match and 0.0 otherwise. Then `update_op` increments `total` with the reduced sum of the product of `weights` and `is_correct`, and it increments `count` with the reduced sum of `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: labels: The ground truth values, a `Tensor` whose shape matches `predictions`. predictions: The predicted values, a `Tensor` of any shape. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). metrics_collections: An optional list of collections that `accuracy` should be added to. updates_collections: An optional list of collections that `update_op` should be added to. name: An optional variable_scope name. Returns: accuracy: A `Tensor` representing the accuracy, the value of `total` divided by `count`. update_op: An operation that increments the `total` and `count` variables appropriately and whose value matches `accuracy`. Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ if context.executing_eagerly(): raise RuntimeError('tf.metrics.err is not supported when eager ' 'execution is enabled.') predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) if labels.dtype != predictions.dtype: predictions = math_ops.cast(predictions, labels.dtype) is_correct = 1 - math_ops.cast(math_ops.equal(predictions, labels), dtypes.float32) return tf.metrics.mean(is_correct, weights, metrics_collections, updates_collections, name or 'err')
def _process_input_helper(self, update_row_factors, sp_input=None, transpose_input=False, row_weights=None): """Creates the graph for processing a sparse slice of input. Args: update_row_factors: if True, update or project the row_factors, else update or project the column factors. sp_input: Please refer to comments for update_row_factors, update_col_factors, project_row_factors, and project_col_factors for restrictions. transpose_input: If True, the input is logically transposed and then the corresponding rows/columns of the transposed input are updated. row_weights: If not None, this is the row/column weights to be used for the update or projection. If None, use the corresponding weights from the model. Note that the feature (column/row) weights will be determined by the model. When not None, it can either be a scalar or a rank-1 tensor with the same number of elements as the number of rows of columns to be updated/projected. Returns: A tuple consisting of the following three elements: new_values: New values for the row/column factors. update_op: An op that assigns the newly computed values to the row/column factors. loss: A tensor (scalar) that contains the normalized minibatch loss, corresponding to sp_input. """ assert isinstance(sp_input, sparse_tensor.SparseTensor) if update_row_factors: left = self._row_factors right_factors = self._col_factors_cache row_wt = self._row_wt_cache col_wt = self._col_wt_cache total_rows = self._input_rows sharding_func = WALSModel._get_sharding_func( self._input_rows, self._num_row_shards) gramian = self._col_gramian_cache else: left = self._col_factors right_factors = self._row_factors_cache row_wt = self._col_wt_cache col_wt = self._row_wt_cache total_rows = self._input_cols sharding_func = WALSModel._get_sharding_func( self._input_cols, self._num_col_shards) gramian = self._row_gramian_cache transpose_input = not transpose_input # Note that the row indices of sp_input are based on the original full input # Here we reindex the rows and give them contiguous ids starting at 0. # We use tf.unique to achieve this reindexing. Note that this is done so # that the downstream kernel can assume that the input is "dense" along the # row dimension. row_ids, col_ids = array_ops.split(value=sp_input.indices, num_or_size_splits=2, axis=1) update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0]) update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0]) col_ids = array_ops.expand_dims( math_ops.cast(all_col_ids, dtypes.int64), 1) row_ids = array_ops.expand_dims( math_ops.cast(all_row_ids, dtypes.int64), 1) if transpose_input: update_indices = update_col_indices row_shape = [ math_ops.cast( array_ops.shape(update_row_indices)[0], dtypes.int64) ] gather_indices = update_row_indices else: update_indices = update_row_indices row_shape = [ math_ops.cast( array_ops.shape(update_col_indices)[0], dtypes.int64) ] gather_indices = update_col_indices num_rows = math_ops.cast( array_ops.shape(update_indices)[0], dtypes.int64) col_shape = [num_rows] right = embedding_ops.embedding_lookup(right_factors, gather_indices, partition_strategy="div") new_sp_indices = array_ops.concat([row_ids, col_ids], 1) new_sp_shape = (array_ops.concat([row_shape, col_shape], 0) if transpose_input else array_ops.concat( [col_shape, row_shape], 0)) new_sp_input = sparse_tensor.SparseTensor(indices=new_sp_indices, values=sp_input.values, dense_shape=new_sp_shape) # Compute lhs and rhs of the normal equations total_lhs = (self._unobserved_weight * gramian) if self._regularization_matrix is not None: total_lhs += self._regularization_matrix if self._row_weights is None: # Special case of ALS. Use a much simpler update rule. total_rhs = (self._unobserved_weight * sparse_ops.sparse_tensor_dense_matmul( new_sp_input, right, adjoint_a=transpose_input)) # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of # transposing explicitly. # TODO(rmlarsen): multi-thread tf.matrix_solve. new_left_values = array_ops.transpose( linalg_ops.matrix_solve(total_lhs, array_ops.transpose(total_rhs))) else: if row_weights is None: # TODO(yifanchen): Add special handling for single shard without using # embedding_lookup and perform benchmarks for those cases. Same for # col_weights lookup below. row_weights_slice = embedding_ops.embedding_lookup( row_wt, update_indices, partition_strategy="div") else: num_indices = array_ops.shape(update_indices)[0] with ops.control_dependencies([ check_ops.assert_less_equal( array_ops.rank(row_weights), 1) ]): row_weights_slice = control_flow_ops.cond( math_ops.equal(array_ops.rank(row_weights), 0), lambda: (array_ops.ones([num_indices]) * row_weights), lambda: math_ops.cast(row_weights, dtypes.float32)) col_weights = embedding_ops.embedding_lookup( col_wt, gather_indices, partition_strategy="div") partial_lhs, total_rhs = ( gen_factorization_ops.wals_compute_partial_lhs_and_rhs( right, col_weights, self._unobserved_weight, row_weights_slice, new_sp_input.indices, new_sp_input.values, num_rows, transpose_input, name="wals_compute_partial_lhs_rhs")) total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs total_rhs = array_ops.expand_dims(total_rhs, -1) new_left_values = array_ops.squeeze( linalg_ops.matrix_solve(total_lhs, total_rhs), [2]) update_op_name = "row_update" if update_row_factors else "col_update" update_op = self.scatter_update(left, update_indices, new_left_values, sharding_func, name=update_op_name) # Create the loss subgraph loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input) if transpose_input else new_sp_input) # sp_approx is the low rank estimate of the input matrix, formed by # computing the product <u_i, v_j> for (i, j) in loss_sp_input.indices. sp_approx_vals = gen_factorization_ops.masked_matmul( new_left_values, right, loss_sp_input.indices, transpose_a=False, transpose_b=True) sp_approx = sparse_tensor.SparseTensor(loss_sp_input.indices, sp_approx_vals, loss_sp_input.dense_shape) sp_approx_sq = math_ops.square(sp_approx) sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1)) sp_residual_sq = math_ops.square(sp_residual) row_wt_mat = (constant_op.constant(0.) if self._row_weights is None else array_ops.expand_dims(row_weights_slice, 1)) col_wt_mat = (constant_op.constant(0.) if self._col_weights is None else array_ops.expand_dims(col_weights, 0)) # We return the normalized loss partial_row_gramian = math_ops.matmul(new_left_values, new_left_values, transpose_a=True) normalization_factor = total_rows / math_ops.cast( num_rows, dtypes.float32) loss = (self._unobserved_weight * (sparse_ops.sparse_reduce_sum(sp_residual_sq) - sparse_ops.sparse_reduce_sum(sp_approx_sq) + math_ops.trace( math_ops.matmul(partial_row_gramian, gramian))) + sparse_ops.sparse_reduce_sum( row_wt_mat * (sp_residual_sq * col_wt_mat))) * normalization_factor if self._regularization is not None: loss += self._regularization * ( math_ops.trace(partial_row_gramian) * normalization_factor + math_ops.trace(gramian)) return (new_left_values, update_op, loss)