def testAssertDivideByZero(self): with self.test_session() as sess: epsilon = ops.convert_to_tensor(1e-20) x = ops.convert_to_tensor(0.0) y = ops.convert_to_tensor(1.0) z = ops.convert_to_tensor(2.0) # assert(epsilon < y) # z / y with sess.graph.control_dependencies([ control_flow_ops.Assert( math_ops.less(epsilon, y), ["Divide-by-zero"]) ]): out = math_ops.div(z, y) self.assertAllEqual(2.0, out.eval()) # assert(epsilon < x) # z / x # # This tests printing out multiple tensors with sess.graph.control_dependencies([ control_flow_ops.Assert( math_ops.less(epsilon, x), ["Divide-by-zero", "less than x"]) ]): out = math_ops.div(z, x) with self.assertRaisesOpError("less than x"): out.eval()
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ if isinstance(denominator, float): if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) if context.in_eager_mode() and denominator._rank() == 0: # pylint: disable=protected-access if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def compute_best_f1_score(tp, fp, fn, name): precision_at_t = math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) recall_at_t = math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) # Compute F1 score. f1_at_thresholds = ( 2.0 * precision_at_t * recall_at_t / (precision_at_t + recall_at_t + epsilon)) return math_ops.reduce_max(f1_at_thresholds)
def inference_graph(self, input_data, **inference_args): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or dict of string->Tensor for the input data. This input_data must generate the same spec as the input_data used in training_graph: the dict must have the same keys, for example, and all tensors must have the same size in their first dimension. **inference_args: Keyword arguments to pass through to each tree. Returns: A tuple of (probabilities, tree_paths, variance), where variance is the variance over all the trees for regression problems only. Raises: NotImplementedError: If trying to use feature bagging with sparse features. """ processed_dense_features, processed_sparse_features, data_spec = ( data_ops.ParseDataTensorOrDict(input_data)) probabilities = [] paths = [] for i in range(self.params.num_trees): with ops.device(self.variables.device_dummies[i].device): tree_data = processed_dense_features if self.params.bagged_features: if processed_sparse_features is not None: raise NotImplementedError( 'Feature bagging not supported with sparse features.') tree_data = self._bag_features(i, tree_data) probs, path = self.trees[i].inference_graph( tree_data, data_spec, sparse_features=processed_sparse_features, **inference_args) probabilities.append(probs) paths.append(path) with ops.device(self.variables.device_dummies[0].device): # shape of all_predict should be [batch_size, num_trees, num_outputs] all_predict = array_ops.stack(probabilities, axis=1) average_values = math_ops.div( math_ops.reduce_sum(all_predict, 1), self.params.num_trees, name='probabilities') tree_paths = array_ops.stack(paths, axis=1) regression_variance = None if self.params.regression: expected_squares = math_ops.div( math_ops.reduce_sum(all_predict * all_predict, 1), self.params.num_trees) regression_variance = math_ops.maximum( 0., expected_squares - average_values * average_values) return average_values, tree_paths, regression_variance
def _DivGrad(op, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy) x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), array_ops.reshape( math_ops.reduce_sum(grad * math_ops.div(math_ops.div(-x, y), y), ry), sy))
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) max_value = math_ops.reduce_max(abs_weights) histogram = _histogram( abs_weights, [0.0, max_value], nbins=self._spec.nbins, dtype=np.float32) cdf = math_ops.cumsum(histogram) norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram)) current_threshold = math_ops.multiply( math_ops.div( math_ops.reduce_sum( math_ops.cast( math_ops.less(norm_cdf, self._sparsity), np.float32)), float(self._spec.nbins)), max_value) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater(abs_weights, smoothed_threshold), np.float32) return smoothed_threshold, new_mask
def _DivGrad(op, grad): """The gradient for the Div operator.""" x = op.inputs[0] y = op.inputs[1] sx = array_ops.shape(x) sy = array_ops.shape(y) # pylint: disable=protected-access rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape(math_ops.reduce_sum(math_ops.div(grad, y), rx), sx), array_ops.reshape(math_ops.reduce_sum( grad * math_ops.div(-x, math_ops.square(y)), ry), sy))
def GetParams(self): """Create a graph containing multiple segment.""" # TODO(aaroey): test graph with different dtypes. dtype = dtypes.float32 input_name = "input" input_dims = [100, 24, 24, 2] g = ops.Graph() with g.as_default(): inp = array_ops.placeholder( dtype=dtype, shape=[None] + input_dims[1:], name=input_name) with g.device("/GPU:0"): conv_filter = constant_op.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], name="weights", dtype=dtype) conv = nn.conv2d( input=inp, filter=conv_filter, strides=[1, 2, 2, 1], padding="SAME", name="conv") c1 = constant_op.constant( np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c1") p = math_ops.mul(conv, c1, name="mul") c2 = constant_op.constant( np.random.randn(input_dims[0], 12, 12, 6), dtype=dtype, name="c2") q = math_ops.div(conv, c2, name="div") edge = self.trt_incompatible_op(q, name="incompatible") edge = math_ops.div(edge, edge, name="div1") r = math_ops.add(edge, edge, name="add") p = math_ops.sub(p, edge, name="sub") q = math_ops.mul(q, edge, name="mul1") s = math_ops.add(p, q, name="add1") s = math_ops.sub(s, r, name="sub1") array_ops.squeeze(s, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which # breaks the connection check, fix it. # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1", # "add", "sub1"]; # - my_trt_op_1 should have ["weights","conv", "div"] expected_engines=["my_trt_op_0", "my_trt_op_1"], expected_output_dims=(100, 12, 12, 6), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def accuracy(predictions, labels, weights=None): """Computes the percentage of times that predictions matches labels. Args: predictions: the predicted values, a `Tensor` whose dtype and shape matches 'labels'. labels: the ground truth values, a `Tensor` of any shape and bool, integer, or string dtype. weights: None or `Tensor` of float values to reweight the accuracy. Returns: Accuracy `Tensor`. Raises: ValueError: if dtypes don't match or if dtype is not bool, integer, or string. """ if not (labels.dtype.is_integer or labels.dtype in (dtypes.bool, dtypes.string)): raise ValueError( 'Labels should have bool, integer, or string dtype, not %r' % labels.dtype) if not labels.dtype.is_compatible_with(predictions.dtype): raise ValueError('Dtypes of predictions and labels should match. ' 'Given: predictions (%r) and labels (%r)' % (predictions.dtype, labels.dtype)) with ops.name_scope('accuracy', values=[predictions, labels]): is_correct = math_ops.cast( math_ops.equal(predictions, labels), dtypes.float32) if weights is not None: is_correct = math_ops.mul(is_correct, weights) num_values = math_ops.mul(weights, array_ops.ones_like(is_correct)) return math_ops.div(math_ops.reduce_sum(is_correct), math_ops.reduce_sum(num_values)) return math_ops.reduce_mean(is_correct)
def _setup_sparsity(self): begin_step = self._spec.sparsity_function_begin_step end_step = self._spec.sparsity_function_end_step initial_sparsity = self._spec.initial_sparsity target_sparsity = self._spec.target_sparsity exponent = self._spec.sparsity_function_exponent if begin_step >= end_step: raise ValueError( 'Pruning must begin before it can end. begin_step=%d, end_step=%d' % (begin_step, end_step)) with ops.name_scope(self._spec.name): p = math_ops.minimum(1.0, math_ops.maximum( 0.0, math_ops.div( math_ops.cast(self._global_step - begin_step, np.float32), end_step - begin_step))) sparsity = math_ops.add( math_ops.multiply(initial_sparsity - target_sparsity, math_ops.pow(1 - p, exponent)), target_sparsity, name='sparsity') return sparsity
def squared_loss(predicted, target, name=None): """Computes and returns the per-example squared loss, divided by 2. Computes the per-example squared difference between the target and predicted tensors. The tensors must have the same shape. Args: predicted: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of predicted values. target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of target values. The shape of the target tensor should match the `predicted` tensor. name: A name for the operation (optional). Returns: A `[batch_size, dim_1, ..., dim_n]` tensor of per-example squared losses. Raises: ValueError: If `predicted` and `target` shapes do not match. """ with ops.op_scope([predicted, target], name, "squared_loss") as scope: predicted = ops.convert_to_tensor(predicted, name="predicted") target = ops.convert_to_tensor(target, name="target") _validate_predicted_and_target(predicted, target) return math_ops.div(math_ops.square(target - predicted), 2.0, name=scope)
def _NthElementGrad(op, grad): """Return the gradients for NthElement. Args: op: The NthElementOp for which we need to generate gradients. grad: Tensor. The gradients passed to the NthElementOp Returns: A list of two tensors, the first being the gradient w.r.t. the input, the second being the gradient w.r.t. the N (None). """ input = op.inputs[0] output = op.outputs[0] # Compute the number of elements which equal to output in each reduction # dimension. If there are multiple elements then the gradient will be # divided between them. indicators = math_ops.cast( math_ops.equal(array_ops.expand_dims(output, -1), input), grad.dtype) grad = array_ops.expand_dims(grad, -1) num_selected = array_ops.expand_dims( math_ops.reduce_sum(indicators, -1), -1) return [math_ops.div(indicators, num_selected) * grad, None]
def inference_graph(self, input_data, data_spec=None, **inference_args): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or SparseTensor or placeholder for input data. data_spec: A list of tf.dtype values specifying the original types of each column. **inference_args: Keyword arguments to pass through to each tree. Returns: The last op in the random forest inference graph. """ data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec probabilities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): tree_data = input_data if self.params.bagged_features: tree_data = self._bag_features(i, input_data) probabilities.append(self.trees[i].inference_graph( tree_data, data_spec, **inference_args)) with ops.device(self.device_assigner.get_device(0)): all_predict = array_ops.pack(probabilities) return math_ops.div( math_ops.reduce_sum(all_predict, 0), self.params.num_trees, name='probabilities')
def __call__(self, step): with ops.name_scope( self.name, "PolynomialDecay", [self.initial_learning_rate, step, self.decay_steps, self.end_learning_rate, self.power] ) as name: initial_learning_rate = ops.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype end_learning_rate = math_ops.cast(self.end_learning_rate, dtype) power = math_ops.cast(self.power, dtype) global_step_recomp = math_ops.cast(step, dtype) decay_steps_recomp = math_ops.cast(self.decay_steps, dtype) if self.cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step_recomp, 0), lambda: 1.0, lambda: math_ops.ceil(global_step_recomp / self.decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, self.decay_steps) p = math_ops.div(global_step_recomp, decay_steps_recomp) return math_ops.add( math_ops.multiply(initial_learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ if compat.forward_compatible(2018, 11, 1): return math_ops.div_no_nan(numerator, denominator, name=name) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def setUp(self): self.a = variables.VariableV1(2.0, name="a") self.b = variables.VariableV1(3.0, name="b") self.c = math_ops.multiply(self.a, self.b, name="c") # Should be 6.0. self.d = math_ops.multiply(self.a, self.a, name="d") # Should be 4.0. self.e = math_ops.multiply(self.d, self.c, name="e") # Should be 24.0. self.f_y = constant_op.constant(0.30, name="f_y") self.f = math_ops.div(self.b, self.f_y, name="f") # Should be 10.0. # The there nodes x, y and z form a graph with "cross-links" in. I.e., x # and y are both direct inputs to z, but x is also a direct input to y. self.x = variables.VariableV1(2.0, name="x") # Should be 2.0 self.y = math_ops.negative(self.x, name="y") # Should be -2.0. self.z = math_ops.multiply(self.x, self.y, name="z") # Should be -4.0. rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, constant_folding=rewriter_config_pb2.RewriterConfig.OFF) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) self.sess = session.Session(config=config) self.sess.run(variables.global_variables_initializer())
def loss(self, logits, target, features): """Returns loss tensor for this head. Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string to target tensor. features: features dict. Returns: Loss tensor. """ target = target[self.name] if isinstance(target, dict) else target loss_unweighted = self._loss_fn(logits, target) weight_tensor = self.get_weight_tensor(features) if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name="loss") else: loss_unweighted = array_ops.reshape(loss_unweighted, shape=(-1,)) loss_weighted = math_ops.mul( loss_unweighted, array_ops.reshape(weight_tensor, shape=(-1,))) return math_ops.div( math_ops.reduce_sum(loss_weighted), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss")
def loss(self, logits, target, features): """Returns loss tensor for this head. The loss returned is the weighted average. L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i} Args: logits: logits, a float tensor. target: either a tensor for labels or in multihead case, a dict of string to target tensor. features: features dict. Returns: Loss tensor. """ target = target[self.name] if isinstance(target, dict) else target loss_unweighted = self._loss_fn(logits, target) weight_tensor = self.get_weight_tensor(features) if weight_tensor is None: return math_ops.reduce_mean(loss_unweighted, name="loss") loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor) return math_ops.div( math_ops.reduce_sum(loss_weighted), math_ops.cast(math_ops.reduce_sum(weight_tensor), dtypes.float32), name="loss")
def _length_penalty(sequence_lengths, penalty_factor): """Calculates the length penalty. See https://arxiv.org/abs/1609.08144. Returns the length penalty tensor: ``` [(5+sequence_lengths)/6]**penalty_factor ``` where all operations are performed element-wise. Args: sequence_lengths: `Tensor`, the sequence lengths of each hypotheses. penalty_factor: A scalar that weights the length penalty. Returns: If the penalty is `0`, returns the scalar `1.0`. Otherwise returns the length penalty factor, a tensor with the same shape as `sequence_lengths`. """ penalty_factor = ops.convert_to_tensor(penalty_factor, name="penalty_factor") penalty_factor.set_shape(()) # penalty should be a scalar. static_penalty = tensor_util.constant_value(penalty_factor) if static_penalty is not None and static_penalty == 0: return 1.0 return math_ops.div((5. + math_ops.to_float(sequence_lengths)) **penalty_factor, (5. + 1.)**penalty_factor)
def weighted_moving_average(value, decay, weight, truediv=True, collections=None, name=None): """Compute the weighted moving average of `value`. Conceptually, the weighted moving average is: `moving_average(value * weight) / moving_average(weight)`, where a moving average updates by the rule `new_value = decay * old_value + (1 - decay) * update` Internally, this Op keeps moving average variables of both `value * weight` and `weight`. Args: value: A numeric `Tensor`. decay: A float `Tensor` or float value. The moving average decay. weight: `Tensor` that keeps the current value of a weight. Shape should be able to multiply `value`. truediv: Boolean, if `True`, dividing by `moving_average(weight)` is floating point division. If `False`, use division implied by dtypes. collections: List of graph collections keys to add the internal variables `value * weight` and `weight` to. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. name: Optional name of the returned operation. Defaults to "WeightedMovingAvg". Returns: An Operation that updates and returns the weighted moving average. """ # Unlike assign_moving_average, the weighted moving average doesn't modify # user-visible variables. It is the ratio of two internal variables, which are # moving averages of the updates. Thus, the signature of this function is # quite different than assign_moving_average. if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] with variable_scope.variable_scope(name, "WeightedMovingAvg", [value, weight, decay]) as scope: value_x_weight_var = variable_scope.get_variable( "value_x_weight", initializer=init_ops.zeros_initializer(value.get_shape(), dtype=value.dtype), trainable=False, collections=collections) weight_var = variable_scope.get_variable( "weight", initializer=init_ops.zeros_initializer(weight.get_shape(), dtype=weight.dtype), trainable=False, collections=collections) numerator = assign_moving_average( value_x_weight_var, value * weight, decay, zero_debias=False) denominator = assign_moving_average( weight_var, weight, decay, zero_debias=False) if truediv: return math_ops.truediv(numerator, denominator, name=scope.name) else: return math_ops.div(numerator, denominator, name=scope.name)
def _sample_n(self, n, seed=None): sample_shape = array_ops.concat(([n], array_ops.shape(self.logits)), 0) logits = self.logits * array_ops.ones(sample_shape) logits_2d = array_ops.reshape(logits, [-1, self.event_size]) np_dtype = self.dtype.as_numpy_dtype # Uniform variates must be sampled from the interval (0,1] rather than # [0,1], as they are passed through log() to compute Gumbel variates. # We need to use np.finfo(np_dtype).tiny because it is the smallest, # positive, "normal" number. A "normal" number is such that the mantissa # has an implicit leading 1. Normal, positive numbers x, y have the # reasonable property that: x + y >= max(x, y). # minval=np.nextafter(np.float32(0),1)) can cause # tf.random_uniform(dtype=tf.float32) to sample 0. uniform = random_ops.random_uniform(shape=array_ops.shape(logits_2d), minval=np.finfo(np_dtype).tiny, maxval=1, dtype=self.dtype, seed=seed) gumbel = -math_ops.log(-math_ops.log(uniform)) noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d) samples = nn_ops.log_softmax(noisy_logits) ret = array_ops.reshape(samples, sample_shape) return ret
def decayed_lr(learning_rate, global_step, decay_steps, end_learning_rate, power, cycle, name): """Helper to recompute learning rate; most helpful in eager-mode.""" with ops.name_scope( name, "PolynomialDecay", [learning_rate, global_step, decay_steps, end_learning_rate, power] ) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype end_learning_rate = math_ops.cast(end_learning_rate, dtype) power = math_ops.cast(power, dtype) global_step_recomp = math_ops.cast(global_step, dtype) decay_steps_recomp = math_ops.cast(decay_steps, dtype) if cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step_recomp, 0), lambda: 1.0, lambda: math_ops.ceil(global_step_recomp / decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps) p = math_ops.div(global_step_recomp, decay_steps_recomp) return math_ops.add( math_ops.multiply(learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def central_crop(image, central_fraction): """Crop the central region of the image. Remove the outer parts of an image but retain the central region of the image along each dimension. If we specify central_fraction = 0.5, this function returns the region marked with "X" in the below diagram. -------- | | | XXXX | | XXXX | | | where "X" is the central 50% of the image. -------- Args: image: 3-D float Tensor of shape [height, width, depth] central_fraction: float (0, 1], fraction of size to crop Raises: ValueError: if central_crop_fraction is not within (0, 1]. Returns: 3-D float Tensor """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) if central_fraction <= 0.0 or central_fraction > 1.0: raise ValueError('central_fraction must be within (0, 1]') if central_fraction == 1.0: return image img_shape = array_ops.shape(image) depth = image.get_shape()[2] fraction_offset = int(1 / ((1 - central_fraction) / 2.0)) bbox_h_start = math_ops.div(img_shape[0], fraction_offset) bbox_w_start = math_ops.div(img_shape[1], fraction_offset) bbox_h_size = img_shape[0] - bbox_h_start * 2 bbox_w_size = img_shape[1] - bbox_w_start * 2 bbox_begin = array_ops.pack([bbox_h_start, bbox_w_start, 0]) bbox_size = array_ops.pack([bbox_h_size, bbox_w_size, -1]) image = array_ops.slice(image, bbox_begin, bbox_size) # The first two dimensions are dynamic and unknown. image.set_shape([None, None, depth]) return image
def convert_image_dtype(image, dtype, name=None): """Convert `image` to `dtype`, scaling its values if needed. Images that are represented using floating point values are expected to have values in the range [0,1). Image data stored in integer data types are expected to have values in the range `[0,MAX]`, wbere `MAX` is the largest positive representable number for the data type. This op converts between data types, scaling the values appropriately before casting. Note that for floating point inputs, this op expects values to lie in [0,1). Conversion of an image containing values outside that range may lead to overflow errors when converted to integer `Dtype`s. Args: image: An image. dtype: A `DType` to convert `image` to. name: A name for this operation (optional). Returns: `image`, converted to `dtype`. """ if dtype == image.dtype: return image with ops.op_scope([image], name, 'convert_image') as name: # Both integer: use integer multiplication in the larger range if image.dtype.is_integer and dtype.is_integer: scale_in = image.dtype.max scale_out = dtype.max if scale_in > scale_out: # Scaling down, scale first, then cast. The scaling factor will # cause in.max to be mapped to above out.max but below out.max+1, # so that the output is safely in the supported range. scale = (scale_in + 1) // (scale_out + 1) scaled = math_ops.div(image, scale) return math_ops.cast(scaled, dtype) else: # Scaling up, cast first, then scale. The scale will not map in.max to # out.max, but converting back and forth should result in no change. cast = math_ops.cast(image, dtype) scale = (scale_out + 1) // (scale_in + 1) return math_ops.mul(cast, scale) elif image.dtype.is_floating and dtype.is_floating: # Both float: Just cast, no possible overflows in the allowed ranges. return math_ops.cast(image, dtype) else: if image.dtype.is_integer: # Converting to float: first cast, then scale cast = math_ops.cast(image, dtype) scale = 1. / image.dtype.max return math_ops.mul(cast, scale) else: # Converting from float: first scale, then cast scale = dtype.max + 0.5 # avoid rounding problems in the cast scaled = math_ops.mul(image, scale) return math_ops.cast(scaled, dtype)
def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies inverse time decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an inverse decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate / (1 + decay_rate * t) ``` Example: decay 1/t with a rate of 0.5: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 k = 0.5 learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_rate: A Python number. The decay rate. name: String. Optional name of the operation. Defaults to 'InverseTimeDecay' Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. """ with ops.name_scope(name, "InverseTimeDecay", [learning_rate, global_step, decay_rate]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype global_step = math_ops.cast(global_step, dtype) decay_steps = math_ops.cast(decay_steps, dtype) decay_rate = math_ops.cast(decay_rate, dtype) p = global_step / decay_steps if staircase: p = math_ops.floor(p) const = math_ops.cast(constant_op.constant(1), learning_rate.dtype) denom = math_ops.add(const, math_ops.mul(decay_rate, p)) return math_ops.div(learning_rate, denom, name=name)
def _weighted_loss(loss, weight_tensor): unweighted_loss = array_ops.reshape(loss, shape=(-1,)) weighted_loss = math_ops.mul( unweighted_loss, array_ops.reshape(weight_tensor, shape=(-1,))) return math_ops.div( math_ops.reduce_sum(weighted_loss), math_ops.to_float(math_ops.reduce_sum(weight_tensor)), name="loss")
def _ProdGrad(op, grad): """Gradient for Prod.""" # TODO(kearnes): this gives NaNs for 0s in the input tensor _, new_output_shape, input_shape = _ReductionGradAssist(op) tile_scaling = input_shape // new_output_shape grad = array_ops.reshape(grad * op.outputs[0], new_output_shape) grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0]) return grad, None
def testFindNodesWithBadTensorValues(self): with session.Session() as sess: u_name = "testFindNodesWithBadTensorValues/u" v_name = "testFindNodesWithBadTensorValues/v" w_name = "testFindNodesWithBadTensorValues/w" x_name = "testFindNodesWithBadTensorValues/x" y_name = "testFindNodesWithBadTensorValues/y" z_name = "testFindNodesWithBadTensorValues/z" u_init = constant_op.constant([2.0, 4.0]) u = variables.Variable(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) v = variables.Variable(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.sub(u, v, name=w_name) # Expected output: [inf, 1.3333] x = math_ops.div(u, w, name=x_name) # Expected output: [nan, 4.0] y = math_ops.mul(w, x, name=y_name) z = math_ops.mul(y, y, name=z_name) u.initializer.run() v.initializer.run() run_options = config_pb2.RunOptions() debug_utils.watch_graph( run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls="file://%s" % self._dump_root) run_metadata = config_pb2.RunMetadata() sess.run(z, options=run_options, run_metadata=run_metadata) dump = debug_data.DebugDumpDir(self._dump_root) def has_bad_value(_, tensor): return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor)) # Find all "offending tensors". bad_data = dump.find(has_bad_value) # Verify that the nodes with bad values are caught through running find # on the debug dump. self.assertEqual(3, len(bad_data)) self.assertEqual(x_name, bad_data[0].node_name) self.assertEqual(y_name, bad_data[1].node_name) self.assertEqual(z_name, bad_data[2].node_name) # Test first_n kwarg of find(): Find the first offending tensor. first_bad_datum = dump.find(has_bad_value, first_n=1) self.assertEqual(1, len(first_bad_datum)) self.assertEqual(x_name, first_bad_datum[0].node_name)
def testBoth(self): with self.session(graph=ops.Graph()): t1 = constant_op.constant([1.0, 0.0]) t2 = constant_op.constant([0.0, 0.0]) a = math_ops.div(t1, t2) check = numerics.add_check_numerics_ops() a = control_flow_ops.with_dependencies([check], a) with self.assertRaisesOpError("Inf and NaN"): self.evaluate(a)
def _ProdGrad(op, grad): """Gradient for Prod.""" # TODO(kearnes): this gives NaNs for 0s in the input tensor input_shape = array_ops.shape(op.inputs[0]) output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad * op.outputs[0], output_shape_kept_dims) grad = math_ops.div(array_ops.tile(grad, tile_scaling), op.inputs[0]) return grad, None
def _rescale_eval_loss(loss, weights): """Rescales evaluation loss according to the given weights. The rescaling is needed because in the training loss weights are not considered in the denominator, whereas for the evaluation loss we should divide by the sum of weights. The rescaling factor is: R = sum_{i} 1 / sum_{i} w_{i} Args: loss: the scalar weighted loss. weights: weight coefficients. Either a scalar, or a `Tensor` of shape [batch_size]. Returns: The given loss multiplied by the rescaling factor. """ rescaling_factor = math_ops.reduce_mean(weights) return math_ops.div(loss, rescaling_factor)
def _loss(self, logits, target, weight_tensor): if self._n_classes < 2: loss_vec = math_ops.square(logits - math_ops.to_float(target)) elif self._n_classes == 2: loss_vec = nn.sigmoid_cross_entropy_with_logits( logits, array_ops.reshape(math_ops.to_float(target), [-1, 1])) else: loss_vec = nn.sparse_softmax_cross_entropy_with_logits( logits, array_ops.reshape(target, [-1])) if weight_tensor is None: return math_ops.reduce_mean(loss_vec, name="loss") else: loss_vec = array_ops.reshape(loss_vec, shape=(-1, )) loss_vec = math_ops.mul( loss_vec, array_ops.reshape(weight_tensor, shape=(-1, ))) return math_ops.div(math_ops.reduce_sum(loss_vec), math_ops.to_float( math_ops.reduce_sum(weight_tensor)), name="loss")
def _length_penalty(sequence_lengths, penalty_factor): """Calculates the length penalty. See https://arxiv.org/abs/1609.08144. Args: sequence_lengths: The sequence length of all hypotheses, a tensor of shape [beam_size, vocab_size]. penalty_factor: A scalar that weights the length penalty. Returns: The length penalty factor, a tensor fo shape [beam_size]. """ penalty_factor = ops.convert_to_tensor(penalty_factor, name="penalty_factor") penalty_factor.set_shape(()) # penalty should be a scalar. static_penalty = tensor_util.constant_value(penalty_factor) if static_penalty is not None and static_penalty == 0: return 1.0 return math_ops.div( (5. + math_ops.to_float(sequence_lengths))**penalty_factor, (5. + 1.)**penalty_factor)
def sig_smooth_sqr(images, batch_size, cube_len, label_num, is_loss=False): sum_axis = [1, 2, 3] if is_loss: smooth_limit = 2.89 images = my_sigmoid(images) else: smooth_limit = 3 images_max = tf.argmax(images, axis=4) images = tf.one_hot(images_max, depth=label_num) chair_voxels = tf.reshape(tf.reduce_sum(images[:, :, :, :, 1:], axis=4), (batch_size, cube_len, cube_len, cube_len, 1)) backgroung_voxels = tf.reshape( images[:, :, :, :, 0], (batch_size, cube_len, cube_len, cube_len, 1)) images = tf.concat((backgroung_voxels, chair_voxels), 4) pixel_dif1 = math_ops.abs(images[:, 1:, :, :, :] - images[:, :-1, :, :, :]) pixel_dif2 = math_ops.abs(images[:, :, 1:, :, :] - images[:, :, :-1, :, :]) pixel_dif3 = math_ops.abs(images[:, :, :, 1:, :] - images[:, :, :, :-1, :]) shape = [batch_size, 1, cube_len, cube_len, 2] padding = tf.zeros(shape, tf.float32) tv_ax_1 = tf.concat((pixel_dif1, padding), axis=1) + tf.concat( (padding, pixel_dif1), axis=1) shape = [batch_size, cube_len, 1, cube_len, 2] padding = tf.zeros(shape, tf.float32) tv_ax_2 = tf.concat((pixel_dif2, padding), axis=2) + tf.concat( (padding, pixel_dif2), axis=2) shape = [batch_size, cube_len, cube_len, 1, 2] padding = tf.zeros(shape, tf.float32) tv_ax_3 = tf.concat((pixel_dif3, padding), axis=3) + tf.concat( (padding, pixel_dif3), axis=3) smooth = math_ops.reduce_sum(tv_ax_1 + tv_ax_2 + tv_ax_3, axis=4) smooth_matrix = tf.square( tf.nn.relu( math_ops.div(smooth, 2) - tf.ones(smooth.shape) * smooth_limit)) smooth_sqr = math_ops.reduce_sum(smooth_matrix, axis=sum_axis) return smooth_sqr
def decayed_lr(): """Helper to recompute learning rate; most helpful in eager-mode.""" global_step_recomp = math_ops.cast(global_step, dtype) decay_steps_recomp = math_ops.cast(decay_steps, dtype) if cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step_recomp, 0), lambda: 1.0, lambda: math_ops.ceil(global_step_recomp / decay_steps)) decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps) p = math_ops.div(global_step_recomp, decay_steps_recomp) return math_ops.add( math_ops.multiply(learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def setUp(self): self.a = variables.Variable(2.0, name="a") self.b = variables.Variable(3.0, name="b") self.c = math_ops.multiply(self.a, self.b, name="c") # Should be 6.0. self.d = math_ops.multiply(self.a, self.a, name="d") # Should be 4.0. self.e = math_ops.multiply(self.d, self.c, name="e") # Should be 24.0. self.f_y = constant_op.constant(0.30, name="f_y") self.f = math_ops.div(self.b, self.f_y, name="f") # Should be 10.0. # The there nodes x, y and z form a graph with "cross-links" in. I.e., x # and y are both direct inputs to z, but x is also a direct input to y. self.x = variables.Variable(2.0, name="x") # Should be 2.0 self.y = math_ops.negative(self.x, name="y") # Should be -2.0. self.z = math_ops.multiply(self.x, self.y, name="z") # Should be -4.0. self.sess = session.Session() self.sess.run(variables.global_variables_initializer())
def rescale_image(image): """Rescale `image` to range $[0, 1]$. It does this by dividing the largest value in the images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The rescaled image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ _Check3DImage(image) image = math_ops.cast(image, dtype=dtypes.float32) image_max = tf.reduce_max(image) image = math_ops.div(image, image_max) return image
def tf_safe_div(numerator, denominator): """ Computes a safe division which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: tf.tensor denominator: tf.tensor Returns: tf.tensor """ return array_ops.where( math_ops.greater(denominator, 0), math_ops.div( numerator, array_ops.where(math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator))
def compute_cdf_from_histogram(values, value_range, **kwargs): """Returns the normalized cumulative distribution of the given values tensor. Computes the histogram and uses tf.cumsum to arrive at cdf Args: values: Numeric `Tensor`. value_range: Shape [2] `Tensor` of same `dtype` as `values`. **kwargs: keyword arguments: nbins, name Returns: A 1-D `Tensor` holding normalized cdf of values. """ nbins = kwargs.get('nbins', _NBINS) name = kwargs.get('name', None) with ops.name_scope(name, 'cdf', [values, value_range, nbins]): histogram = _histogram( values, value_range, dtype=dtypes.float32, nbins=nbins) cdf = math_ops.cumsum(histogram) return math_ops.div(cdf, math_ops.reduce_max(cdf))
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ return array_ops.where(math_ops.greater(denominator, 0), math_ops.div( numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.sub(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
def _SparseReduceMinOrMaxGrad(op, out_grad): sp_indices = op.inputs[0] sp_values = op.inputs[1] sp_shape = op.inputs[2] reduction_axes = op.inputs[3] output = op.outputs[0] # Handle keepdims output_shape_kept_dims = math_ops.reduced_shape(sp_shape, op.inputs[3]) out_grad = array_ops.reshape(out_grad, output_shape_kept_dims) output = array_ops.reshape(output, output_shape_kept_dims) # Map input and output coefficients scale = sp_shape // math_ops.to_int64(output_shape_kept_dims) scaled_indices = sp_indices // scale # Map pooled values with corresponding max/min values sp_max_val = array_ops.gather_nd(output, scaled_indices) indicators = math_ops.cast(math_ops.equal(sp_values, sp_max_val), out_grad.dtype) grad_values = array_ops.gather_nd(out_grad, scaled_indices) # Compute the number of selected (maximum or minimum) elements in each # reduction dimension. If there are multiple minimum or maximum elements # then the gradient will be divided between them. # (same as for MaxGrad) sp_indicators = sparse_tensor.SparseTensor(sp_indices, indicators, sp_shape) num_selected = array_ops.gather_nd( sparse_ops.sparse_reduce_sum(sp_indicators, axis=reduction_axes, keep_dims=True), scaled_indices) # (input_indices, input_values, input_shape, reduction_axes) return [ None, math_ops.div(indicators, math_ops.maximum(num_selected, 1)) * grad_values, None, None ]
def _setup_sparsity(self): begin_step = self._spec.sparsity_function_begin_step end_step = self._spec.sparsity_function_end_step initial_sparsity = self._spec.initial_sparsity target_sparsity = self._spec.target_sparsity exponent = self._spec.sparsity_function_exponent with ops.name_scope(self._spec.name): p = math_ops.minimum( 1.0, math_ops.maximum( 0.0, math_ops.div( math_ops.cast(self._global_step - begin_step, dtypes.float32), end_step - begin_step))) sparsity = math_ops.add(math_ops.multiply( initial_sparsity - target_sparsity, math_ops.pow(1 - p, exponent)), target_sparsity, name='sparsity') return sparsity
def _sample_n(self, n, seed=None): sample_shape = array_ops.concat([[n], array_ops.shape(self.logits)], 0) logits = self.logits * array_ops.ones(sample_shape) logits_2d = array_ops.reshape(logits, [-1, self.event_size]) # Uniform variates must be sampled from the open-interval `(0, 1)` rather # than `[0, 1)`. To do so, we use `np.finfo(self.dtype.as_numpy_dtype).tiny` # because it is the smallest, positive, "normal" number. A "normal" number # is such that the mantissa has an implicit leading 1. Normal, positive # numbers x, y have the reasonable property that, `x + y >= max(x, y)`. In # this case, a subnormal number (i.e., np.nextafter) can cause us to sample # 0. uniform = random_ops.random_uniform( shape=array_ops.shape(logits_2d), minval=np.finfo(self.dtype.as_numpy_dtype).tiny, maxval=1., dtype=self.dtype, seed=seed) gumbel = -math_ops.log(-math_ops.log(uniform)) noisy_logits = math_ops.div(gumbel + logits_2d, self._temperature_2d) samples = nn_ops.log_softmax(noisy_logits) ret = array_ops.reshape(samples, sample_shape) return ret
def sum_squared_loss(predicted, target, name=None): # pylint: disable=line-too-long """Calculates 1/2 the sum of the squared loss across batches. Computes the squared difference between the target and predicted tensors, sums across all dimensions except dimension 0, and divides by 2: losses = reduce_batch_sum(squared_loss(predicted, target)) / 2.0 where `losses` is a tensor with dimensions [batch_size]. The tensors must have the same shape. This function is equivalent to typical formulations of L2 loss, and similar to TensorFlow's l2_loss function. It differs from the l2_loss function by allowing the caller to specify both the predicted and target tensors. Args: predicted: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of predicted values. target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of target values. The shape of the target tensor should match the `predicted` tensor. name: A name for the operation (optional). Returns: A `[batch_size]` tensor of squared losses summed across all dimensions except dimension 0, divided by 2. Raises: ValueError: If `predicted` and `target` shapes do not match. """ # pylint: enable=line-too-long with ops.op_scope([predicted, target], name, "sum_squared_loss") as scope: return math_ops.div(reduce_batch_sum(squared_loss(predicted, target)), 2.0, name=scope)
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u, g = array_ops.split(_linear([inputs, state],3 * self._num_units, True, 1.0), 3, 1) r, u, g = sigmoid(r), sigmoid(u), sigmoid(g) with vs.variable_scope("Candidate"): c = self._activation(_linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c eps = 1e-13 temp = math_ops.div(math_ops.reduce_sum(math_ops.multiply(new_h, state),1), \ math_ops.reduce_sum(math_ops.multiply(state,state),1) + eps) m = array_ops.transpose(g) t1 = math_ops.multiply(m , temp) t1 = array_ops.transpose(t1) distract_h = new_h - state * t1 return distract_h, distract_h
def accuracy(predictions, labels, weights=None, name=None): """Computes the percentage of times that predictions matches labels. Args: predictions: the predicted values, a `Tensor` whose dtype and shape matches 'labels'. labels: the ground truth values, a `Tensor` of any shape and bool, integer, or string dtype. weights: None or `Tensor` of float values to reweight the accuracy. name: A name for the operation (optional). Returns: Accuracy `Tensor`. Raises: ValueError: if dtypes don't match or if dtype is not bool, integer, or string. """ if not (labels.dtype.is_integer or labels.dtype in (dtypes.bool, dtypes.string)): raise ValueError( 'Labels should have bool, integer, or string dtype, not %r' % labels.dtype) if not labels.dtype.is_compatible_with(predictions.dtype): raise ValueError('Dtypes of predictions and labels should match. ' 'Given: predictions (%r) and labels (%r)' % (predictions.dtype, labels.dtype)) with ops.name_scope(name, 'accuracy', values=[predictions, labels]): is_correct = math_ops.cast(math_ops.equal(predictions, labels), dtypes.float32) if weights is not None: is_correct = math_ops.multiply(is_correct, weights) num_values = math_ops.multiply(weights, array_ops.ones_like(is_correct)) return math_ops.div(math_ops.reduce_sum(is_correct), math_ops.reduce_sum(num_values)) return math_ops.reduce_mean(is_correct)
def inference_graph(self, input_data, **inference_args): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or dict of string->Tensor for input data. **inference_args: Keyword arguments to pass through to each tree. Returns: The last op in the random forest inference graph. Raises: NotImplementedError: If trying to use feature bagging with sparse features. """ processed_dense_features, processed_sparse_features, data_spec = ( data_ops.ParseDataTensorOrDict(input_data)) probabilities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): tree_data = processed_dense_features if self.params.bagged_features: if processed_sparse_features is not None: raise NotImplementedError( 'Feature bagging not supported with sparse features.' ) tree_data = self._bag_features(i, input_data) probabilities.append(self.trees[i].inference_graph( tree_data, data_spec, sparse_features=processed_sparse_features, **inference_args)) with ops.device(self.device_assigner.get_device(0)): all_predict = array_ops.stack(probabilities) return math_ops.div(math_ops.reduce_sum(all_predict, 0), self.params.num_trees, name='probabilities')
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit variance. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: An n-D Tensor where the last 3 dimensions are `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ with ops.name_scope(None, 'per_image_standardization', [image]) as scope: image = ops.convert_to_tensor(image, name='image') num_pixels = math_ops.reduce_prod(array_ops.shape(image)[1:4]) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) variance = (math_ops.reduce_mean( math_ops.square(image), axis=[-1, -2, -3], keepdims=True) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale, name=scope) return image
def _SegmentMinOrMaxGrad(op, grad, is_sorted): """Gradient for SegmentMin and (unsorted) SegmentMax. They share similar code.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) # Get the number of selected (minimum or maximum) elements in each segment. gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) is_selected = math_ops.equal(op.inputs[0], gathered_outputs) if is_sorted: num_selected = math_ops.segment_sum( math_ops.cast(is_selected, grad.dtype), op.inputs[1]) else: num_selected = math_ops.unsorted_segment_sum( math_ops.cast(is_selected, grad.dtype), op.inputs[1], op.inputs[2]) # Compute the gradient for each segment. The gradient for the ith segment is # divided evenly among the selected elements in that segment. weighted_grads = math_ops.div(grad, num_selected) gathered_grads = array_ops.gather(weighted_grads, op.inputs[1]) if is_sorted: return array_ops.where(is_selected, gathered_grads, zeros), None else: return array_ops.where(is_selected, gathered_grads, zeros), None, None
def _NthElementGrad(op, grad): """Return the gradients for NthElement. Args: op: The NthElementOp for which we need to generate gradients. grad: Tensor. The gradients passed to the NthElementOp Returns: A list of two tensors, the first being the gradient w.r.t. the input, the second being the gradient w.r.t. the N (None). """ input = op.inputs[0] # pylint: disable=redefined-builtin output = op.outputs[0] # Compute the number of elements which equal to output in each reduction # dimension. If there are multiple elements then the gradient will be # divided between them. indicators = math_ops.cast( math_ops.equal(array_ops.expand_dims(output, -1), input), grad.dtype) grad = array_ops.expand_dims(grad, -1) num_selected = array_ops.expand_dims(math_ops.reduce_sum(indicators, -1), -1) return [math_ops.div(indicators, num_selected) * grad, None]
def _length_penalty(sequence_lengths, penalty_factor): """Calculates the length penalty. See https://arxiv.org/abs/1609.08144. Returns the length penalty tensor: ``` [(5+sequence_lengths)/6]**penalty_factor ``` where all operations are performed element-wise. Args: sequence_lengths: `Tensor`, the sequence lengths of each hypotheses. penalty_factor: A scalar that weights the length penalty. Returns: If the penalty is `0`, returns the scalar `1.0`. Otherwise returns the length penalty factor, a tensor with the same shape as `sequence_lengths`. """ penalty_factor = ops.convert_to_tensor(penalty_factor, name="penalty_factor") penalty_factor.set_shape(()) # penalty should be a scalar. static_penalty = tensor_util.constant_value(penalty_factor) if static_penalty is not None and static_penalty == 0: return 1.0 return math_ops.div( (5. + math_ops.to_float(sequence_lengths))**penalty_factor, (5. + 1.)**penalty_factor)
def safe_div(numerator, denominator): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. Returns: The element-wise value of the numerator divided by the denominator. """ if compat.forward_compatible(2018, 11, 1): return math_ops.div_no_nan(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div( numerator, array_ops.where(math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator))
def __call__(self, inputs, state, reuse=False): scope = self.scope with vs.variable_scope(scope, reuse=reuse): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r = linear(tf.concat((inputs, state), 1), self._num_units, activation_fn=None) g = linear(tf.concat((inputs, state), 1), self._num_units, activation_fn=None) u = linear(tf.concat((inputs, state), 1), self._num_units, activation_fn=None) r, u, g = tf.cast(sigmoid(r), tf.float32), tf.cast( sigmoid(u), tf.float32), tf.cast(sigmoid(g), tf.float32) #print ("R SHAPE: ", r) #print ("STATE: ", state) #print ("INPUTS: ", inputs) #print ("CONCAT: ", tf.concat((inputs, r * state), 1)) with vs.variable_scope("Candidate"): c = self._activation( linear(tf.concat((inputs, r * state), 1), self._num_units, activation_fn=None)) new_h = u * state + (1 - u) * c eps = 1e-13 temp = math_ops.div( math_ops.reduce_sum(math_ops.mul(new_h, state), 1), math_ops.reduce_sum(math_ops.mul(state, state), 1) + eps) m = array_ops.transpose(g) t1 = math_ops.mul(m, temp) t1 = array_ops.transpose(t1) distract_h = new_h - state * t1 return distract_h, distract_h
def polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None): """Applies a polynomial decay to the learning rate. It is commonly observed that a monotonically decreasing learning rate, whose degree of change is carefully chosen, results in a better performing model. This function applies a polynomial decay function to a provided initial `learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. It requires a `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python global_step = min(global_step, decay_steps) decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate ``` If `cycle` is True then a multiple of `decay_steps` is used, the first one that is bigger than `global_steps`. ```python decay_steps = decay_steps * ceil(global_step / decay_steps) decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate ``` Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): ```python ... global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 end_learning_rate = 0.01 decay_steps = 10000 learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step, decay_steps, end_learning_rate, power=0.5) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A scalar `int32` or `int64` `Tensor` or a Python number. Global step to use for the decay computation. Must not be negative. decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number. Must be positive. See the decay computation above. end_learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The minimal end learning rate. power: A scalar `float32` or `float64` `Tensor` or a Python number. The power of the polynomial. Defaults to linear, 1.0. cycle: A boolean, whether or not it should cycle beyond decay_steps. name: String. Optional name of the operation. Defaults to 'PolynomialDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. """ if global_step is None: raise ValueError("global_step is required for polynomial_decay.") with ops.name_scope( name, "PolynomialDecay", [learning_rate, global_step, decay_steps, end_learning_rate, power ]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype global_step = math_ops.cast(global_step, dtype) decay_steps = math_ops.cast(decay_steps, dtype) end_learning_rate = math_ops.cast(end_learning_rate, dtype) power = math_ops.cast(power, dtype) if cycle: # Find the first multiple of decay_steps that is bigger than global_step. # If global_step is zero set the multiplier to 1 multiplier = control_flow_ops.cond( math_ops.equal(global_step, 0), lambda: 1.0, lambda: math_ops.ceil(global_step / decay_steps)) decay_steps = math_ops.multiply(decay_steps, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step = math_ops.minimum(global_step, decay_steps) p = math_ops.div(global_step, decay_steps) return math_ops.add(math_ops.multiply( learning_rate - end_learning_rate, math_ops.pow(1 - p, power)), end_learning_rate, name=name)
def embedding_lookup_sparse_with_distributed_aggregation( params, sp_ids, sp_weights, partition_strategy="mod", name=None, combiner=None, max_norm=None): """Computes embeddings for the given ids and weights. Embeddings belonging to same param are aggregated on that device first. This op is intended to decrease data transmission and improve parallelism. See `tf.nn.embedding_lookup_sparse` for the functionality and example of this op. Args: params: A single tensor representing the complete embedding tensor, or a list of P tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. sp_weights: either a SparseTensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, sp_weights must have exactly the same shape and indices as sp_ids. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: Optional name for the op. combiner: A string specifying the reduction op. Currently "mean", "sqrtn" and "sum" are supported. "sum" computes the weighted sum of the embedding results for each row. "mean" is the weighted sum divided by the total weight. "sqrtn" is the weighted sum divided by the square root of the sum of the squares of the weights. max_norm: If not None, each embedding is normalized to have l2 norm equal to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by sp_ids, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither None nor SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] if not isinstance(sp_ids, sparse_tensor.SparseTensor): raise TypeError("sp_ids must be SparseTensor") ignore_weights = sp_weights is None if not ignore_weights: if not isinstance(sp_weights, sparse_tensor.SparseTensor): raise TypeError("sp_weights must be either None or SparseTensor") sp_ids.values.get_shape().assert_is_compatible_with( sp_weights.values.get_shape()) sp_ids.indices.get_shape().assert_is_compatible_with( sp_weights.indices.get_shape()) sp_ids.dense_shape.get_shape().assert_is_compatible_with( sp_weights.dense_shape.get_shape()) # TODO(yleon): Add enhanced node assertions to verify that sp_ids and # sp_weights have equal indices and shapes. with ops.name_scope(name, "embedding_lookup_sparse", params + [sp_ids]) as name: segment_ids = sp_ids.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) ids = sp_ids.values if ignore_weights: ids, idx = array_ops.unique(ids) else: idx = None weights = None if ignore_weights else sp_weights.values embeddings = _embedding_lookup_with_distributed_aggregation( params, ids, partition_strategy=partition_strategy, max_norm=max_norm, weights=weights, idx=idx, segment_ids=segment_ids) # Set weights to all one if ignore weights. if ignore_weights: weights = array_ops.fill([array_ops.shape(segment_ids)[0]], 1) if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) # Reshape weights. ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) if embeddings.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(embeddings.get_shape().ndims - 1)])) if combiner == "mean": weight_sum = math_ops.segment_sum(weights, segment_ids) embeddings = math_ops.div(embeddings, weight_sum) elif combiner == "sqrtn": weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) embeddings = math_ops.div(embeddings, weight_sum_sqrt) elif combiner != "sum": assert False, "Unrecognized combiner" return embeddings
def embedding_lookup_sparse(params, sp_ids, sp_weights, partition_strategy="mod", name=None, combiner=None, max_norm=None): """Computes embeddings for the given ids and weights. This op assumes that there is at least one id for each row in the dense tensor represented by sp_ids (i.e. there are no rows with empty features), and that all the indices of sp_ids are in canonical row-major order. It also assumes that all id values lie in the range [0, p0), where p0 is the sum of the size of params along dimension 0. Args: params: A single tensor representing the complete embedding tensor, or a list of P tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId), where N is typically batch size and M is arbitrary. sp_weights: either a SparseTensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, sp_weights must have exactly the same shape and indices as sp_ids. partition_strategy: A string specifying the partitioning strategy, relevant if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: Optional name for the op. combiner: A string specifying the reduction op. Currently "mean", "sqrtn" and "sum" are supported. "sum" computes the weighted sum of the embedding results for each row. "mean" is the weighted sum divided by the total weight. "sqrtn" is the weighted sum divided by the square root of the sum of the squares of the weights. max_norm: If not None, each embedding is normalized to have l2 norm equal to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by sp_ids, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if shape(combined params) = [p0, p1, ..., pm] and shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn] then shape(output) = [d0, d1, ..., dn-1, p1, ..., pm]. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id 0, weight 1.0 [2, 3]: id 1, weight 3.0 with `combiner`="mean", then the output will be a 3x20 matrix where output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = params[0, :] * 1.0 output[2, :] = params[1, :] * 3.0 Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither None nor SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if combiner not in ("mean", "sqrtn", "sum"): raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'") if isinstance(params, variables.PartitionedVariable): params = list(params) # Iterate to get the underlying Variables. if not isinstance(params, list): params = [params] if not isinstance(sp_ids, sparse_tensor.SparseTensor): raise TypeError("sp_ids must be SparseTensor") ignore_weights = sp_weights is None if not ignore_weights: if not isinstance(sp_weights, sparse_tensor.SparseTensor): raise TypeError("sp_weights must be either None or SparseTensor") sp_ids.values.get_shape().assert_is_compatible_with( sp_weights.values.get_shape()) sp_ids.indices.get_shape().assert_is_compatible_with( sp_weights.indices.get_shape()) sp_ids.dense_shape.get_shape().assert_is_compatible_with( sp_weights.dense_shape.get_shape()) # TODO(yleon): Add enhanced node assertions to verify that sp_ids and # sp_weights have equal indices and shapes. with ops.name_scope(name, "embedding_lookup_sparse", params + [sp_ids]) as name: segment_ids = sp_ids.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) ids = sp_ids.values if ignore_weights: ids, idx = array_ops.unique(ids) else: idx = None embeddings = embedding_lookup(params, ids, partition_strategy=partition_strategy, max_norm=max_norm) if not ignore_weights: weights = sp_weights.values if weights.dtype != embeddings.dtype: weights = math_ops.cast(weights, embeddings.dtype) # Reshape weights to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1) bcast_weights_shape = array_ops.concat_v2( [array_ops.shape(weights), ones], 0) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set the weight shape, since after reshaping to bcast_weights_shape, # the shape becomes None. if embeddings.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [1 for _ in range(embeddings.get_shape().ndims - 1)])) embeddings *= weights if combiner == "sum": embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.segment_sum(embeddings, segment_ids) weight_sum = math_ops.segment_sum(weights, segment_ids) embeddings = math_ops.div(embeddings, weight_sum, name=name) elif combiner == "sqrtn": embeddings = math_ops.segment_sum(embeddings, segment_ids) weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name) else: assert False, "Unrecognized combiner" else: assert idx is not None if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, name=name) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids, name=name) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx, segment_ids, name=name) else: assert False, "Unrecognized combiner" return embeddings
def convert_image_dtype(image, dtype, saturate=False, name=None): """Convert `image` to `dtype`, scaling its values if needed. Images that are represented using floating point values are expected to have values in the range [0,1). Image data stored in integer data types are expected to have values in the range `[0,MAX]`, where `MAX` is the largest positive representable number for the data type. This op converts between data types, scaling the values appropriately before casting. Note that converting from floating point inputs to integer types may lead to over/underflow problems. Set saturate to `True` to avoid such problem in problematic conversions. If enabled, saturation will clip the output into the allowed range before performing a potentially dangerous cast (and only before performing such a cast, i.e., when casting from a floating point to an integer type, and when casting from a signed to an unsigned type; `saturate` has no effect on casts between floats, or on casts that increase the type's range). Args: image: An image. dtype: A `DType` to convert `image` to. saturate: If `True`, clip the input before casting (if necessary). name: A name for this operation (optional). Returns: `image`, converted to `dtype`. """ if dtype == image.dtype: return image with ops.op_scope([image], name, 'convert_image') as name: # Both integer: use integer multiplication in the larger range if image.dtype.is_integer and dtype.is_integer: scale_in = image.dtype.max scale_out = dtype.max if scale_in > scale_out: # Scaling down, scale first, then cast. The scaling factor will # cause in.max to be mapped to above out.max but below out.max+1, # so that the output is safely in the supported range. scale = (scale_in + 1) // (scale_out + 1) scaled = math_ops.div(image, scale) if saturate: return saturate_cast(scaled, dtype) else: return math_ops.cast(scaled, dtype) else: # Scaling up, cast first, then scale. The scale will not map in.max to # out.max, but converting back and forth should result in no change. if saturate: cast = saturate_cast(scaled, dtype) else: cast = math_ops.cast(image, dtype) scale = (scale_out + 1) // (scale_in + 1) return math_ops.mul(cast, scale) elif image.dtype.is_floating and dtype.is_floating: # Both float: Just cast, no possible overflows in the allowed ranges. # Note: We're ignoreing float overflows. If your image dynamic range # exceeds float range you're on your own. return math_ops.cast(image, dtype) else: if image.dtype.is_integer: # Converting to float: first cast, then scale. No saturation possible. cast = math_ops.cast(image, dtype) scale = 1. / image.dtype.max return math_ops.mul(cast, scale) else: # Converting from float: first scale, then cast scale = dtype.max + 0.5 # avoid rounding problems in the cast scaled = math_ops.mul(image, scale) if saturate: return saturate_cast(scaled, dtype) else: return math_ops.cast(scaled, dtype)
def inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None): """Applies inverse time decay to the initial learning rate. When training a model, it is often recommended to lower the learning rate as the training progresses. This function applies an inverse decay function to a provided initial learning rate. It requires an `global_step` value to compute the decayed learning rate. You can just pass a TensorFlow variable that you increment at each training step. The function returns the decayed learning rate. It is computed as: ```python decayed_learning_rate = learning_rate / (1 + decay_rate * t) ``` Example: decay 1/t with a rate of 0.5: ```python ... global_step = tf.Variable(0, trainable=False) learning_rate = 0.1 k = 0.5 learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k) # Passing global_step to minimize() will increment it at each step. learning_step = ( tf.train.GradientDescentOptimizer(learning_rate) .minimize(...my loss..., global_step=global_step) ) ``` Args: learning_rate: A scalar `float32` or `float64` `Tensor` or a Python number. The initial learning rate. global_step: A Python number. Global step to use for the decay computation. Must not be negative. decay_steps: How often to apply decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. name: String. Optional name of the operation. Defaults to 'InverseTimeDecay'. Returns: A scalar `Tensor` of the same type as `learning_rate`. The decayed learning rate. Raises: ValueError: if `global_step` is not supplied. """ if global_step is None: raise ValueError("global_step is required for inverse_time_decay.") with ops.name_scope(name, "InverseTimeDecay", [learning_rate, global_step, decay_rate]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype global_step = math_ops.cast(global_step, dtype) decay_steps = math_ops.cast(decay_steps, dtype) decay_rate = math_ops.cast(decay_rate, dtype) p = global_step / decay_steps if staircase: p = math_ops.floor(p) const = math_ops.cast(constant_op.constant(1), learning_rate.dtype) denom = math_ops.add(const, math_ops.multiply(decay_rate, p)) return math_ops.div(learning_rate, denom, name=name)
def testComplexDiv(self): foo = array_ops.constant([1. + 3.j]) with self.cached_session(): _ = math_ops.divide(foo, 1.).eval() _ = math_ops.div(foo, 2.).eval()