def hinge_loss(logits, labels=None, scope=None): """Method that returns the loss tensor for hinge loss. Args: logits: The logits, a float tensor. Note that logits are assumed to be unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive (resp. negative) binary prediction. labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. Internally the {0,1} labels are converted to {-1,1} when calculating the hinge loss. scope: The scope for the operations performed in computing the loss. Returns: An unweighted `Tensor` of same shape as `logits` and `labels` representing the loss values across the batch. Raises: ValueError: If the shapes of `logits` and `labels` don't match. """ with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope: logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). labels = math_ops.to_float(labels) all_ones = array_ops.ones_like(labels) labels = math_ops.subtract(2 * labels, all_ones) return nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
def test_multiple_outputs(self): # - + # / \y0 y1/ \ # x split z # | # y (nodes are ops; edges are going up) g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtypes.float32, shape=[1], name='x') y = array_ops.placeholder(dtypes.float32, shape=[2], name='y') y0, y1 = array_ops.split(y, num_or_size_splits=2, axis=0) z = array_ops.placeholder(dtypes.float32, shape=[1], name='z') math_ops.add(x, y0) math_ops.subtract(y1, z) y1_pattern = graph_matcher.OpTypePattern('*') minus_pattern = graph_matcher.OpTypePattern('Sub', inputs=[y1_pattern, '*']) matcher = graph_matcher.GraphMatcher(minus_pattern) match_results = list(matcher.match_graph(g)) self.assertEqual(1, len(match_results)) match_result = match_results[0] self.assertEqual(y0.op, y1.op) self.assertEqual(match_result.get_op(y1_pattern), y1.op) self.assertEqual(match_result.get_tensor(y1_pattern), y1)
def hinge_loss(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Adds a hinge loss to the training procedure. Args: labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. logits: The logits, a float tensor. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shapes of `logits` and `labels` don't match. """ with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope: logits = math_ops.to_float(logits) labels = math_ops.to_float(labels) logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). all_ones = array_ops.ones_like(labels) labels = math_ops.subtract(2 * labels, all_ones) losses = nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(labels, logits))) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def huber_loss(y_true, y_pred, delta=1.0): """Computes Huber loss value. For each value x in `error=y_true-y_pred`, the following is calculated: ``` 0.5 * x^2 if |x| <= d 0.5 * d^2 + d * (|x| - d) if |x| > d ``` where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss Args: y_true: tensor of true targets. y_pred: tensor of predicted targets. delta: A float, the point where the Huber loss function changes from a quadratic to linear. Returns: Tensor with one scalar loss entry per sample. """ y_pred = math_ops.cast(y_pred, dtype=K.floatx()) y_true = math_ops.cast(y_true, dtype=K.floatx()) error = math_ops.subtract(y_pred, y_true) abs_error = math_ops.abs(error) quadratic = math_ops.minimum(abs_error, delta) linear = math_ops.subtract(abs_error, quadratic) return math_ops.add( math_ops.multiply( ops.convert_to_tensor(0.5, dtype=quadratic.dtype), math_ops.multiply(quadratic, quadratic)), math_ops.multiply(delta, linear))
def hinge_loss(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a hinge loss to the training procedure. WARNING: `weights` also supports dimensions of 1, but the broadcasting does not work as advertised, you'll wind up with weighted sum instead of weighted mean for any but the last dimension. This will be cleaned up soon, so please do not rely on the current behavior for anything but the shapes documented for `weights` below. Args: labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. logits: The logits, a float tensor. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` of the loss value. Raises: ValueError: If the shapes of `logits` and `labels` don't match. """ with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope: logits = math_ops.to_float(logits) labels = math_ops.to_float(labels) logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). all_ones = array_ops.ones_like(labels) labels = math_ops.subtract(2 * labels, all_ones) losses = nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(labels, logits))) return compute_weighted_loss(losses, weights, scope, loss_collection)
def testStripUnusedMultipleInputs(self): input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" # We'll create an input graph that multiplies two input nodes. with ops.Graph().as_default(): constant_node1 = constant_op.constant(1.0, name="constant_node1") constant_node2 = constant_op.constant(2.0, name="constant_node2") input_node1 = math_ops.subtract(constant_node1, 3.0, name="input_node1") input_node2 = math_ops.subtract(constant_node2, 5.0, name="input_node2") output_node = math_ops.multiply( input_node1, input_node2, name="output_node") math_ops.add(output_node, 2.0, name="later_node") sess = session.Session() output = sess.run(output_node) self.assertNear(6.0, output, 0.00001) graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name) # We save out the graph to disk, and then call the const conversion # routine. input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name) input_binary = False input_node_names = "input_node1,input_node2" input_node_types = [ dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum ] output_binary = True output_node_names = "output_node" output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name) strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary, output_graph_path, output_binary, input_node_names, output_node_names, input_node_types) # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. with ops.Graph().as_default(): output_graph_def = graph_pb2.GraphDef() with open(output_graph_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = importer.import_graph_def(output_graph_def, name="") self.assertEqual(3, len(output_graph_def.node)) for node in output_graph_def.node: self.assertNotEqual("Add", node.op) self.assertNotEqual("Sub", node.op) if node.name == input_node_names: self.assertTrue("shape" in node.attr) with session.Session() as sess: input_node1 = sess.graph.get_tensor_by_name("input_node1:0") input_node2 = sess.graph.get_tensor_by_name("input_node2:0") output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node, feed_dict={input_node1: [10.0], input_node2: [-5.0]}) self.assertNear(-50.0, output, 0.00001)
def unregularized_loss(self, examples): """Add operations to compute the loss (without the regularization loss). Args: examples: Examples to compute unregularized loss on. Returns: An Operation that computes mean (unregularized) loss for given set of examples. Raises: ValueError: if examples are not well defined. """ self._assertSpecified([ 'example_labels', 'example_weights', 'sparse_features', 'dense_features' ], examples) self._assertList(['sparse_features', 'dense_features'], examples) with name_scope('sdca/unregularized_loss'): predictions = math_ops.cast( self._linear_predictions(examples), dtypes.float64) labels = math_ops.cast( internal_convert_to_tensor(examples['example_labels']), dtypes.float64) weights = math_ops.cast( internal_convert_to_tensor(examples['example_weights']), dtypes.float64) if self._options['loss_type'] == 'logistic_loss': return math_ops.reduce_sum(math_ops.multiply( sigmoid_cross_entropy_with_logits(labels=labels, logits=predictions), weights)) / math_ops.reduce_sum(weights) if self._options['loss_type'] == 'poisson_loss': return math_ops.reduce_sum(math_ops.multiply( log_poisson_loss(targets=labels, log_input=predictions), weights)) / math_ops.reduce_sum(weights) if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']: # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to # first convert 0/1 labels into -1/1 labels. all_ones = array_ops.ones_like(predictions) adjusted_labels = math_ops.subtract(2 * labels, all_ones) # Tensor that contains (unweighted) error (hinge loss) per # example. error = nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(adjusted_labels, predictions))) weighted_error = math_ops.multiply(error, weights) return math_ops.reduce_sum(weighted_error) / math_ops.reduce_sum( weights) # squared loss err = math_ops.subtract(labels, predictions) weighted_squared_err = math_ops.multiply(math_ops.square(err), weights) # SDCA squared loss function is sum(err^2) / (2*sum(weights)) return (math_ops.reduce_sum(weighted_squared_err) / (2.0 * math_ops.reduce_sum(weights)))
def exact_laplacian_kernel(x, y, stddev): """Computes exact Laplacian kernel value(s) for tensors x and y using stddev. The Laplacian kernel for vectors u, v is defined as follows: K(u, v) = exp(-||u-v|| / stddev) where the norm is the l1-norm. x, y can be either vectors or matrices. If they are vectors, they must have the same dimension. If they are matrices, they must have the same number of columns. In the latter case, the method returns (as a matrix) K(u, v) values for all pairs (u, v) where u is a row from x and v is a row from y. Args: x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim]. y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim]. stddev: The width of the Gaussian kernel. Returns: A single value (scalar) with shape (1, 1) if x, y are vectors or a matrix of shape (m, n) with entries K(u, v) (where K is the Laplacian kernel) for all (u,v) pairs where u, v are rows from x and y respectively. Raises: InvalidShapeError: if the shapes of x, y are not compatible. """ x_aligned, y_aligned = _align_matrices(x, y) diff_l1_norm = math_ops.reduce_sum( math_ops.abs(math_ops.subtract(x_aligned, y_aligned)), 2) return math_ops.exp(-diff_l1_norm / stddev)
def BackwardLoopBody(*args): """Backward loop body function.""" t, dev_t = args[0], args[1] (theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state1, d_inputs, d_acc_state) = _Pack(args[2:], bakloop_sig) # The input recurrent state for time step t is previous time step's # output, or the original state0 when on time step 0. state_from_acc = _Index(acc_state, math_ops.maximum(0, t - 1)) state0 = functional_ops.If( math_ops.equal(t, array_ops.constant(0, dtypes.int32)), _Flatten([state_from_acc, orig_state0]), ReturnOrigState0, ReturnAccState) state0 = nest.pack_sequence_as(orig_state0, state0) # The external inputs for time step t. inputs_t = _Index(inputs, t) # The extras for time step t. extras_t = _Index(acc_extras, t) d_state1 = _Add(_Index(d_acc_state, t), d_state1) (d_theta_t, d_state0, d_inputs_t) = _Pack( Bak(*_Flatten([theta, state0, inputs_t, extras_t, d_state1])), [self._theta, self._state, self._inputs]) d_theta = _Add(d_theta, d_theta_t) d_inputs = _Update(d_inputs, d_inputs_t, dev_t) return [math_ops.subtract(dev_t, 1)] + _Flatten([ theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state0, d_inputs, d_acc_state ])
def mean_squared_error(predictions, labels=None, weights=1.0, scope=None): """Adds a Sum-of-Squares loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: predictions: The predicted outputs. labels: The ground truth output tensor, same dimensions as 'predictions'. weights: Coefficients for the loss a scalar, a tensor of shape [batch_size] or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "mean_squared_error", [predictions, labels, weights]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) losses = math_ops.square(math_ops.subtract(predictions, labels)) return compute_weighted_loss(losses, weights, scope=scope)
def _Update_global_variables(): local_vars = [v for g, v in grads_and_vars if g is not None] global_center_vars = [self._global_map[var] for var in local_vars] local_center_vars = [self._local_map[var] for var in local_vars] local_center_vars_update = [] for lvar, var in zip(local_center_vars, global_center_vars): local_center_vars_update.append(lvar.assign(var)) update_ops = [] differences = [] with ops.control_dependencies(local_center_vars_update): for v, lv in zip(local_vars, local_center_vars): with ops.device(v.device): differences.append(math_ops.subtract(v, lv)) for lvar, diff in zip(local_vars, differences): with ops.device(lvar.device): update_ops.append( state_ops.assign_sub(lvar, math_ops.multiply(self._moving_rate, diff))) for var, diff in zip(global_center_vars, differences): with ops.device(var.device): update_ops.append( state_ops.assign_add(var, math_ops.multiply(self._moving_rate, diff))) if global_step: with ops.colocate_with(global_step): update_ops.append(state_ops.assign_add(global_step, 1)) variable_update = control_flow_ops.group(*(update_ops)) return variable_update
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS): """Adds a Huber Loss term to the training procedure. For each value x in `error=labels-predictions`, the following is calculated: ``` 0.5 * x^2 if |x| <= d 0.5 * d^2 + d * (|x| - d) if |x| > d ``` where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). delta: `float`, the point where the huber loss function changes from a quadratic to linear. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "huber_loss", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) error = math_ops.subtract(predictions, labels) abs_error = math_ops.abs(error) quadratic = math_ops.minimum(abs_error, delta) # The following expression is the same in value as # tf.maximum(abs_error - delta, 0), but importantly the gradient for the # expression when abs_error == delta is 0 (for tf.maximum it would be 1). # This is necessary to avoid doubling the gradient, since there is already a # nonzero contribution to the gradient from the quadratic term. linear = (abs_error - quadratic) losses = 0.5 * quadratic**2 + delta * linear return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def mean_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a Sum-of-Squares loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "mean_squared_error", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = math_ops.square(math_ops.subtract(predictions, labels)) return compute_weighted_loss(losses, weights, scope, loss_collection)
def huber_loss(labels, predictions, weight=1.0, k=1.0, scope=None): """Define a huber loss https://en.wikipedia.org/wiki/Huber_loss tensor: tensor to regularize. k: value of k in the huber loss scope: Optional scope for op_scope. Huber loss: f(x) = if |x| <= k: 0.5 * x^2 else: k * |x| - 0.5 * k^2 Returns: the L1 loss op. http://concise-bio.readthedocs.io/en/latest/_modules/concise/tf_helper.html """ with ops.name_scope(scope, "absolute_difference", [predictions, labels]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) if weight is None: raise ValueError("`weight` cannot be None") predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) diff = math_ops.subtract(predictions, labels) abs_diff = tf.abs(diff) losses = tf.where(abs_diff < k, 0.5 * tf.square(diff), k * abs_diff - 0.5 * k ** 2) return tf.losses.compute_weighted_loss(losses, weight)
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None): """Calculate the mean and variance of based on the sufficient statistics. Args: counts: A `Tensor` containing a the total count of the data (one value). mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly shifted) sum of the elements to average over. variance_ss: A `Tensor` containing the variance sufficient statistics: the (possibly shifted) squared sum of the data to compute the variance over. shift: A `Tensor` containing the value by which the data is shifted for numerical stability, or `None` if no shift was performed. name: Name used to scope the operations that compute the moments. Returns: Two `Tensor` objects: `mean` and `variance`. """ with ops.name_scope(name, "normalize", [counts, mean_ss, variance_ss, shift]): divisor = math_ops.reciprocal(counts, name="divisor") if shift is not None: shifted_mean = math_ops.multiply(mean_ss, divisor, name="shifted_mean") mean = math_ops.add(shifted_mean, shift, name="mean") else: # no shift. shifted_mean = math_ops.multiply(mean_ss, divisor, name="mean") mean = shifted_mean variance = math_ops.subtract(math_ops.multiply(variance_ss, divisor), math_ops.square(shifted_mean), name="variance") return (mean, variance)
def inner_loss(y_true, y_pred): delta = math_ops.subtract(y_pred, y_true) losses = tf.clip_by_value(delta, clip_value_min=clip_value_min, clip_value_max=clip_value_max) losses = tf.square(losses) return losses
def inner_loss(y_true, y_pred): delta = math_ops.abs(math_ops.subtract(y_pred, y_true)) losses = math_ops.square(delta) if clip > 0.0: losses = tf.where(delta < clip, 0.5 * losses, delta - 0.5) return losses
def _model_fn(features, labels, mode): _ = labels x = features['x'] y = features['y'] with ops.name_scope('outputs'): predictions = {'sum': math_ops.add(x, y, name='sum'), 'product': math_ops.multiply(x, y, name='product'), 'difference': math_ops.subtract(x, y, name='difference')} if core: export_outputs = {k: export_output.PredictOutput({k: v}) for k, v in predictions.items()} export_outputs[signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['sum'] return model_fn.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs, loss=constant_op.constant(0), train_op=control_flow_ops.no_op()) else: output_alternatives = {k: (constants.ProblemType.UNSPECIFIED, {k: v}) for k, v in predictions.items()} return contrib_model_fn.ModelFnOps( mode=mode, predictions=predictions, output_alternatives=output_alternatives, loss=constant_op.constant(0), train_op=control_flow_ops.no_op())
def hinge_loss(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Adds a hinge loss to the training procedure. Args: labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. Internally the {0,1} labels are converted to {-1,1} when calculating the hinge loss. logits: The logits, a float tensor. Note that logits are assumed to be unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive (resp. negative) binary prediction. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shapes of `logits` and `labels` don't match or if `labels` or `logits` is None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if logits is None: raise ValueError("logits must not be None.") with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope: logits = math_ops.to_float(logits) labels = math_ops.to_float(labels) logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). all_ones = array_ops.ones_like(labels) labels = math_ops.subtract(2 * labels, all_ones) losses = nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(labels, logits))) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def _FoldFusedBatchNorms(graph): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise convolution. Args: graph: Graph to walk and modify. Raises: ValueError: When batch norm folding fails. """ for match in _FindFusedBatchNorms(graph): scope, sep, _ = match.layer_op.name.rpartition('/') # Make sure new ops are added to `graph` and put on the same device as # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope # named `scope`. Otherwise, TF creates a unique scope whose name starts with # `scope`. with graph.as_default(), graph.name_scope(scope + sep), ops.device( match.bn_op.device): with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep): # new weights = old weights * gamma / sqrt(variance + epsilon) # new biases = -mean * gamma / sqrt(variance + epsilon) + beta multiplier_tensor = match.gamma_tensor * math_ops.rsqrt( match.variance_tensor + match.bn_op.get_attr('epsilon')) bias_tensor = math_ops.subtract( match.beta_tensor, match.mean_tensor * multiplier_tensor, name='bias') # The shape of depthwise weights is different, so we need to reshape the # multiplier_tensor to ensure that the scaled_weight_tensor has the # expected shape. if match.layer_op.type == 'DepthwiseConv2dNative': new_shape = [ match.weight_tensor.get_shape().as_list()[2], match.weight_tensor.get_shape().as_list()[3] ] multiplier_tensor = array_ops.reshape( multiplier_tensor, new_shape, name='scale_reshape') # TODO(suharshs): This naming of the following ops needs to carefully # follow the naming expected by quantize.py. Generalize the quantize code # to not require these delicate naming conventions. scaled_weight_tensor = math_ops.multiply( match.weight_tensor, multiplier_tensor, name='mul_fold') new_layer_tensor = _CloneWithNewOperands( match.layer_op, match.input_tensor, scaled_weight_tensor) bias_add_tensor = math_ops.add( new_layer_tensor, bias_tensor, name='add_fold') nodes_modified_count = graph_editor.reroute_ts(bias_add_tensor, match.output_tensor) if nodes_modified_count != 1: raise ValueError( 'Unexpected inputs to op: %s' % match.output_tensor.name)
def _AtanhGrad(op, grad): """Returns grad * 1/ (1 - x^2).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) x2 = math_ops.square(x) one = constant_op.constant(1, dtype=grad.dtype) inv = math_ops.reciprocal(math_ops.subtract(one, x2)) return grad * inv
def _AcosGrad(op, grad): """Returns grad * -1/sqrt(1-x^2).""" x = op.inputs[0] with ops.control_dependencies([grad.op]): x = math_ops.conj(x) x2 = math_ops.square(x) one = constant_op.constant(1, dtype=grad.dtype) den = math_ops.sqrt(math_ops.subtract(one, x2)) inv = math_ops.reciprocal(den) return -grad * inv
def _objective(self, x): """Rosenbrock function. (Carl Edward Rasmussen, 2001-07-21). f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 Args: x: a Variable Returns: f: a tensor (objective value) """ d = array_ops.size(x) s = math_ops.add( 100 * math_ops.square( math_ops.subtract( array_ops.strided_slice(x, [1], [d]), math_ops.square(array_ops.strided_slice(x, [0], [d - 1])))), math_ops.square( math_ops.subtract(1.0, array_ops.strided_slice(x, [0], [d - 1])))) return math_ops.reduce_sum(s)
def test_logging_trainable(self): with ops.Graph().as_default() as g, self.test_session(g): var = variables.Variable(constant_op.constant(42.0), name='foo') var.initializer.run() cof = constant_op.constant(1.0) loss = math_ops.subtract( math_ops.multiply(var, cof), constant_op.constant(1.0)) train_step = gradient_descent.GradientDescentOptimizer(0.5).minimize(loss) ops.get_default_session().run(train_step) self._run_monitor(learn.monitors.LoggingTrainable('foo')) self.assertRegexpMatches(str(self.logged_message), var.name)
def compute_gradients(self, loss, var_list=None, gate_gradients=optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): """Compute gradients of `loss` for the variables in `var_list`. Add rho*elastic_difference to loss to control the exploration This is the first part of `minimize()`. It returns a list of (gradient, variable) pairs where "gradient" is the gradient for "variable". Note that "gradient" can be a `Tensor`, an `IndexedSlices`, or `None` if there is no gradient for the given variable. Args: loss: A Tensor containing the value to minimize. var_list: Optional list or tuple of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKey.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: A list of (gradient, variable) pairs. Variable is always present, but gradient can be `None`. Raises: TypeError: If `var_list` contains anything else than `Variable` objects. ValueError: If some arguments are invalid. """ if not var_list: var_list = variables.trainable_variables() elastic_difference = [ math_ops.subtract(v, lv) for v, lv in zip(variables.trainable_variables(), [self._local_map[var] for var in var_list]) ] distance_loss = self._rho * math_ops.add_n( [gen_nn_ops.l2_loss(ed) for ed in elastic_difference]) total_loss = loss + distance_loss return self._opt.compute_gradients(total_loss, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, grad_loss)
def call(self, values, denominator): """Computes the rate since the last call. Args: values: Tensor with the per-example value. denominator: Measure to take the rate with respect to. Returns: The rate or 0 if denominator is unchanged since last call. """ if denominator.dtype != dtypes.float64: denominator = math_ops.cast(denominator, dtypes.float64) if values.dtype != dtypes.float64: values = math_ops.cast(values, dtypes.float64) state_ops.assign(self.numer, math_ops.subtract(values, self.prev_values)) state_ops.assign(self.denom, math_ops.subtract(denominator, self.prev_denominator)) state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) return self._safe_div(self.numer, self.denom, name="safe_rate")
def testFeedTwoHandlesDirectly(self): with self.test_session() as sess: a = constant_op.constant(10.0) b = constant_op.constant(5.0) c = math_ops.multiply(a, b) d = math_ops.div(a, b) e = math_ops.subtract(c, d) h_c = sess.run(session_ops.get_session_handle(c)) h_d = sess.run(session_ops.get_session_handle(d)) self.assertAllClose(48.0, sess.run(e, feed_dict={c: h_c, d: h_d})) self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
def _assign_moving_average(self, variable, value, one_minus_decay): with ops.name_scope(None, 'AssignMovingAvg', [variable, value, one_minus_decay]) as scope: with ops.colocate_with(variable): update_delta = math_ops.multiply( math_ops.subtract(variable.read_value(), value), one_minus_decay) if isinstance(variable, resource_variable_ops.ResourceVariable): # state_ops.assign_sub does an extra read_variable_op after the # assign. We avoid that here. return gen_resource_variable_ops.assign_sub_variable_op( variable.handle, update_delta, name=scope) else: return state_ops.assign_sub(variable, update_delta, name=scope)
def absolute_difference( labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Adds an Absolute Difference loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a `Tensor` of shape `[batch_size]`, then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid or if `labels` or `predictions` is None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") with ops.name_scope(scope, "absolute_difference", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = math_ops.abs(math_ops.subtract(predictions, labels)) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def hinge_loss(logits, labels=None, scope=None): """Method that returns the loss tensor for hinge loss. Args: logits: The logits, a float tensor. labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. scope: The scope for the operations performed in computing the loss. Returns: A `Tensor` of same shape as `logits` and `labels` representing the loss values across the batch. Raises: ValueError: If the shapes of `logits` and `labels` don't match. """ with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope: logits.get_shape().assert_is_compatible_with(labels.get_shape()) # We first need to convert binary labels to -1/1 labels (as floats). labels = math_ops.to_float(labels) all_ones = array_ops.ones_like(labels) labels = math_ops.subtract(2 * labels, all_ones) return nn_ops.relu( math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
def absolute_difference( labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): """Adds an Absolute Difference loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a `Tensor` of shape `[batch_size]`, then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: Type of reduction to apply to loss. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same shape as `labels`; otherwise, it is scalar. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid or if `labels` or `predictions` is None. """ if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") with ops.name_scope(scope, "absolute_difference", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = math_ops.abs(math_ops.subtract(predictions, labels)) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction)
def testNextOp(self): """Ensures all ops get selected eventually.""" with tf_ops.Graph().as_default(): ops = [ math_ops.add(1, 2), math_ops.subtract(1, 2), math_ops.reduce_mean([1, 2]), ] queue = op_queue.OpQueue(ops, seed=0) with self.cached_session() as sess: # Ensure every inv update op gets selected. selected_ops = set([queue.next_op(sess) for _ in ops]) self.assertEqual(set(ops), set(selected_ops)) # Ensure additional calls don't create any new ops. selected_ops.add(queue.next_op(sess)) self.assertEqual(set(ops), set(selected_ops))
def setUp(self): self.a = variables.Variable(10.0, name="a") self.b = variables.Variable(20.0, name="b") self.c = math_ops.add(self.a, self.b, name="c") # Should be 30.0. self.d = math_ops.subtract(self.a, self.c, name="d") # Should be -20.0. self.e = math_ops.multiply(self.c, self.d, name="e") # Should be -600.0. self.ph = array_ops.placeholder(dtypes.float32, shape=(2, 2), name="ph") self.f = math_ops.multiply(self.e, self.ph, name="f") self.opt = gradient_descent.GradientDescentOptimizer(0.1).minimize( self.e, name="opt") self.sess = session.Session() self.sess.run(self.a.initializer) self.sess.run(self.b.initializer)
def mean_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS): """Adds a Sum-of-Squares loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. reduction: Type of reduction to apply to loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "mean_squared_error", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) losses = math_ops.square(math_ops.subtract(predictions, labels)) return compute_weighted_loss(losses, weights, scope, loss_collection, reduction=reduction)
def test_subscribe_tensors_within_control_flow_context(self): """Side effect ops are added with the same control flow context.""" c1 = constant_op.constant(10) c2 = constant_op.constant(20) x1 = math_ops.add(c1, c2) x2 = math_ops.multiply(c1, c2) cond = control_flow_ops.cond( x1 < x2, lambda: math_ops.add(c1, c2, name='then'), lambda: math_ops.subtract(c1, c2, name='else'), name='cond') branch = ops.get_default_graph().get_tensor_by_name('cond/then:0') def context(tensor): return tensor.op._get_control_flow_context() self.assertIs(context(x1), context(x2)) self.assertIsNot(context(x1), context(branch)) results = [] def sub(tensor): results.append(tensor) return tensor tensors = [x1, branch, x2] subscriptions = subscribe.subscribe( tensors, lambda t: script_ops.py_func(sub, [t], [t.dtype])) for tensor, subscription in zip(tensors, subscriptions): self.assertIs(context(tensor), context(subscription)) # Verify that sub(x1) and sub(x2) are in the same context. self.assertIs(context(subscriptions[0]), context(subscriptions[2])) # Verify that sub(x1) and sub(branch) are not. self.assertIsNot(context(subscriptions[0]), context(subscriptions[1])) with self.cached_session() as sess: self.evaluate(cond) self.assertEqual(3, len(results))
def get_smoothL1_loss(predictions, labels, object_matches, delta=1.0): ''' Calculate smooth L1 loss Args: predictions (batch size, dimensions) labels (batch size, dimensions) Returns: smooth L1 loss according to https://mohitjainweb.files.wordpress.com/2018/03/smoothl1loss.pdf ''' #assert(predictions.shape == labels.shape) predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) # Simpler L1 loss #return tf.reduce_mean(math_ops.abs(math_ops.subtract(predictions, labels))) l1_losses = tf.reshape( object_matches, (-1, object_matches.shape[1])) * math_ops.reduce_sum( math_ops.abs(math_ops.subtract(predictions, labels)), axis=-1) condition = tf.less(l1_losses, delta) smoothL1_loss = tf.where(condition, 0.5 * (l1_losses**2), l1_losses - 0.5) return tf.reduce_mean(smoothL1_loss)
def fm_logit_fn(inputs): with variable_scope.variable_scope('get_fm_inputs'): fm_filed_num = len(column_names) fm_inputs = [] for c in column_names: fm_inputs.append(inputs[c]) net = array_ops.concat(fm_inputs, axis=1) embeddings = gen_array_ops.reshape( net, (-1, fm_filed_num, fm_embedding_size)) # -1 * F * K # according to simplified formula summed_squared_emb = math_ops.square( math_ops.reduce_sum(embeddings, -2)) # -1 * K squared_summed_emb = math_ops.reduce_sum( math_ops.square(embeddings), -2) # -1 * K logits = 0.5 * math_ops.reduce_sum( math_ops.subtract(summed_squared_emb, squared_summed_emb), -1) # -1 logits = array_ops.expand_dims(logits, -1) return logits
def mean_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a Sum-of-Squares loss to the training procedure. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. If the shape of `weights` matches the shape of `predictions`, then the loss of each measurable element of `predictions` is scaled by the corresponding value of `weights`. WARNING: `weights` also supports dimensions of 1, but the broadcasting does not work as advertised, you'll wind up with weighted sum instead of weighted mean for any but the last dimension. This will be cleaned up soon, so please do not rely on the current behavior for anything but the shapes documented for `weights` below. Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. """ with ops.name_scope(scope, "mean_squared_error", [predictions, labels, weights]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) losses = math_ops.square(math_ops.subtract(predictions, labels)) return compute_weighted_loss(losses, weights, scope, loss_collection)
def _get_learning_rate(self, step): with ops.name_scope_v2(self.name or 'PolynomialDecayWithWarmup') as name: initial_learning_rate = ops.convert_to_tensor_v2( self.initial_learning_rate, name='initial_learning_rate') warmup_steps = ops.convert_to_tensor_v2(self.warmup_steps, name='warmup_steps') warmup_rate = (initial_learning_rate * step / warmup_steps) poly_steps = math_ops.subtract(step, warmup_steps) poly_rate = self.poly_rate_scheduler(poly_steps) decay_rate = tf.where(step <= warmup_steps, warmup_rate, poly_rate, name=name) return decay_rate
def focal_loss_alpha(labels=[], logits=[], pos_weights=[], gamma=2., clips=[], name='focal_loss'): """ Add focal loss weigths to the wigthted sigmoid cross entropy :return: """ batchsize = labels.get_shape().as_list()[0] n_classes = labels.get_shape().as_list()[1] with tf.variable_scope(name) as vs: # first get a sigmoid to determine the focal loss weigths: sigmoid_logits = tf.nn.sigmoid(logits) # determine the focal loss weigths: labels = math_ops.to_float(labels) sigmoid_logits.get_shape().assert_is_compatible_with( labels.get_shape()) preds = array_ops.where(math_ops.equal(labels, 1.), sigmoid_logits, 1. - sigmoid_logits) focal_weights = (math_ops.subtract(1., preds))**gamma print(focal_weights) # clip the weights at E-3 and E3 up_clip = math_ops.multiply(tf.ones([batchsize, n_classes]), clips[1]) low_clip = math_ops.multiply(tf.ones([batchsize, n_classes]), clips[0]) focal_weights = array_ops.where( math_ops.greater(focal_weights, clips[1]), up_clip, focal_weights) focal_weights = array_ops.where(math_ops.less(focal_weights, clips[0]), low_clip, focal_weights) log_weight = 1. + (pos_weights - 1.) * labels # now put them into a weighted softmax ce: loss = math_ops.multiply(math_ops.add( (1. - labels) * logits, log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) + nn_ops.relu(-logits))), focal_weights, name='sc_entropy') return loss
def switch_loss(labels, predictions, weights=1.0, c1=0.999, c2=1.0, c3=1.0, switch_value=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS): """Exponential loss for labels <= switch_value, linear loss for labels > switch_value. Exponential function is defined by `f(x) = b*x^2` with the constraints `f(c1) = c1` and `f(c2) = c3*c2`. Thus, for each label `l` and prediction `p` the following is calculated: ``` b*(l-p)^a if l <= d |l-p| if l > d ``` """ def get_exp_params(c1=0.999, c2=1.0, c3=1.0): a = math.log(float(c1) / (c2 * c3), c1) / (1 - math.log(float(c3), c1)) b = c1 / c1**a return a, b if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") a, b = get_exp_params(c1, c2, c3) with ops.name_scope(scope, "switch_loss", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) error = math_ops.subtract(predictions, labels) abs_error = math_ops.abs(error) exp_error = b * (abs_error**a) losses = tf.where(labels <= switch_value, exp_error, abs_error) return tf.losses.compute_weighted_loss(losses, weights, scope, loss_collection, reduction=reduction)
def __call__(self, step): with tf.name_scope(self.name or "Dilera") as name: dtype = tf.dtypes.float32 initial_learning_rate = tf.convert_to_tensor(self.initial_learning_rate, dtype=dtype, name="initial_learning_rate") sigma = math_ops.cast(self.sigma, dtype) t_step = math_ops.cast(step, dtype) # t_step = math_ops.multiply(t_step, t_step) dt = tf.constant(1, dtype=dtype) t_step = math_ops.add(t_step, tf.constant(1, dtype=dtype)) Z_t = tf.random.normal([1], mean=0.0, stddev=1.0, dtype=dtype) Z_over_T = math_ops.divide(Z_t[0], t_step) Sigma_Z_over_T = math_ops.multiply(sigma, Z_over_T) Sigma_Z_sqrtDt_over_T = math_ops.multiply(Sigma_Z_over_T, math_ops.sqrt(dt)) eta_dT = math_ops.multiply(initial_learning_rate, dt) newLearningRate = math_ops.subtract(eta_dT, Sigma_Z_sqrtDt_over_T, name=name) return newLearningRate
def mean_squared_log_loss( labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS): with ops.name_scope(scope, "mean_squared_norm_loss", (predictions, labels, weights)) as scope: predictions = math_ops.to_float(predictions) labels = math_ops.to_float(labels) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) error = math_ops.square( math_ops.subtract(math_ops.log(tf.maximum(predictions, 1.0)), math_ops.log(tf.maximum(labels, 1.0)))) return tf.losses.compute_weighted_loss(error, weights, scope, loss_collection, reduction=reduction)
def normalize_for_graph_lstm(tensor): """Normalizes Tensor to range [-0.5, 0.5]. Scales a Tensor uniformly to fit within [-0.5, 0.5]^n. Additionally, each dimension is shifted to be centred around [0]^n i.e. the origin, in a way that data extends the same distance in positive and negative direction. In other words, the mean between maximum and minimum value of each dimension is shifted to zero. The undo_scaling op undoes scaling, but does not undo shifting. The unnormalize op does both, but is currently unused. Returns: The normalized Tensor, and an op to undo normalization. Example usage: ``` normalized_tensor, undo_scaling = normalize_for_graph_lstm(input_tensor) normalized_output_tensor = some_op(normalized_tensor) output_tensor = undo_scaling(normalized_output_tensor) ``` """ # tensor is normalized to range[-0.5, 0.5] # this function assumes tensors with shape [ batch_size, number_of_nodes, output_size ] assert (len(tensor.shape) == 3) # compute maximum and minimum joint position value in each dimension max_dim = math_ops.reduce_max(tensor, axis=1, keepdims=True) min_dim = math_ops.reduce_min(tensor, axis=1, keepdims=True) diff_dim = math_ops.subtract(max_dim, min_dim) # get normalizing factor as maximum difference within all dimensions max_diff = math_ops.reduce_max(diff_dim, axis=2, keepdims=True) normalized_tensor = math_ops.divide(tensor - min_dim - diff_dim / 2, max_diff) # return output rescaled and shifted to original position def unnormalize(tensor): return math_ops.multiply(tensor, max_diff) + diff_dim / 2 + min_dim # return output only rescaled, centered around 0 def undo_scaling(tensor): return math_ops.multiply(tensor, max_diff) return normalized_tensor, undo_scaling
def softmax(logits: ragged_tensor.Ragged, axis=None, name=None): """Computes softmax activations. Used for multi-class predictions. The sum of all outputs generated by softmax is 1. This function performs the equivalent of softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) Example usage: >>> softmax = tf.nn.softmax([-1, 0., 1.]) >>> softmax <tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.09003057, 0.24472848, 0.66524094], dtype=float32)> >>> sum(softmax) <tf.Tensor: shape=(), dtype=float32, numpy=1.0> Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type and shape as `logits`. Raises: InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ if axis is None: axis = -1 with ops.name_scope(name, 'RaggedSoftmax', [logits]) as name: max_input = reduce_max(logits, axis=axis, keepdims=True) logits_exp = math_ops.exp(math_ops.subtract(logits, max_input)) denominator = reduce_sum(logits_exp, axis=axis, keepdims=True) return math_ops.divide(logits_exp, denominator)
def testMultiGPUSessionRun(self): local_devices = device_lib.list_local_devices() gpu_device_names = [] for device in local_devices: if device.device_type == "GPU": gpu_device_names.append(device.name) gpu_device_names = sorted(gpu_device_names) if len(gpu_device_names) < 2: self.skipTest( "This test requires at least 2 GPUs, but only %d is available." % len(gpu_device_names)) with session.Session() as sess: v = variables.Variable([10.0, 15.0], dtype=dtypes.float32, name="v") with ops.device(gpu_device_names[0]): u0 = math_ops.add(v, v, name="u0") with ops.device(gpu_device_names[1]): u1 = math_ops.multiply(v, v, name="u1") w = math_ops.subtract(u1, u0, name="w") self.evaluate(v.initializer) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_urls="file://" + self._dump_root) run_metadata = config_pb2.RunMetadata() self.assertAllClose( [80.0, 195.0], sess.run(w, options=run_options, run_metadata=run_metadata)) debug_dump_dir = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs) self.assertEqual(3, len(debug_dump_dir.devices())) self.assertAllClose( [10.0, 15.0], debug_dump_dir.get_tensors("v", 0, "DebugIdentity")[0]) self.assertAllClose( [20.0, 30.0], debug_dump_dir.get_tensors("u0", 0, "DebugIdentity")[0]) self.assertAllClose( [100.0, 225.0], debug_dump_dir.get_tensors("u1", 0, "DebugIdentity")[0])
def setUp(self): self.base_path = os.path.join(test.get_temp_dir(), "no_vars") if not os.path.exists(self.base_path): os.mkdir(self.base_path) # Create a simple graph with a variable, then convert variables to # constants and export the graph. with ops.Graph().as_default() as g: x = array_ops.placeholder(dtypes.float32, name="x") w = variables.Variable(3.0) y = math_ops.subtract(w * x, 7.0, name="y") # pylint: disable=unused-variable ops.add_to_collection("meta", "this is meta") with self.session(graph=g) as session: variables.global_variables_initializer().run() new_graph_def = graph_util.convert_variables_to_constants( session, g.as_graph_def(), ["y"]) filename = os.path.join(self.base_path, constants.META_GRAPH_DEF_FILENAME) saver.export_meta_graph( filename, graph_def=new_graph_def, collection_list=["meta"])
def _KroneckerProduct(b1, b2): """Computes the Kronecker product of two batches of square matrices.""" b1_shape = array_ops.shape(b1) b2_shape = array_ops.shape(b2) b1_order = b1_shape[-1] b2_order = b2_shape[-1] shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)] shape_slice = array_ops.slice(b1_shape, [0], shape_slice_size) # Same for both batches b1_reshape_shape = array_ops.concat( [shape_slice, [b1_order], [1], [b1_order], [1]], 0) b2_reshape_shape = array_ops.concat( [shape_slice, [1], [b2_order], [1], [b2_order]], 0) b1_reshape = array_ops.reshape(b1, b1_reshape_shape) b2_reshape = array_ops.reshape(b2, b2_reshape_shape) order_prod = b1_order * b2_order kprod_shape = array_ops.concat([shape_slice, [order_prod], [order_prod]], 0) return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)
def setUp(self): self.a = variables.VariableV1(10.0, name="a") self.b = variables.VariableV1(20.0, name="b") self.c = math_ops.add(self.a, self.b, name="c") # Should be 30.0. self.d = math_ops.subtract(self.a, self.c, name="d") # Should be -20.0. self.e = math_ops.multiply(self.c, self.d, name="e") # Should be -600.0. self.ph = array_ops.placeholder(dtypes.float32, shape=(2, 2), name="ph") self.f = math_ops.multiply(self.e, self.ph, name="f") self.opt = gradient_descent.GradientDescentOptimizer(0.1).minimize( self.e, name="opt") rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True) graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) config = config_pb2.ConfigProto(graph_options=graph_options) self.sess = session.Session(config=config) self.sess.run(self.a.initializer) self.sess.run(self.b.initializer)
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
def sample(self, time, outputs, state, name=None): """sample for SampledEmbeddingHelper.""" del time, state # unused by sample_fn # Outputs are logits, use random_ops.multinomial to sample ids if not isinstance(outputs, ops.Tensor): raise TypeError("Expected outputs to be a single Tensor, got: %s" % type(outputs)) outputs2 = math_ops.div(math_ops.exp(outputs), self.temp) outputs3 = math_ops.div( outputs2, math_ops.reduce_sum(outputs2, axis=1, keep_dims=True)) outputs4 = math_ops.log(outputs3) - math_ops.log( math_ops.subtract(1.0, outputs3)) sample_ids2 = math_ops.cast(random_ops.multinomial(outputs4, 1), dtypes.int32) sample_ids = array_ops.reshape(sample_ids2, [-1]) #with open("log.txt","w") as f: # f.write(",".join([str(outputs.shape), str(outputs2.shape), str(outputs3.shape), str(sample_ids2.shape), str(sample_ids.shape), # str(outputs), str(outputs2), str(outputs3), str(sample_ids2), str(sample_ids),str(self.temp)])) return sample_ids
def compute_loss(labels, predictions, weights, loss_collection): predictions = math_ops.cast(predictions, dtype=dtypes.float32) predictions.get_shape().assert_is_compatible_with( labels.get_shape()) diffs = math_ops.subtract(predictions, labels) axis = math_ops.range(1, array_ops.rank(diffs)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), axis=axis, keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan( sum_squares_diff_per_batch, math_ops.maximum(num_present_per_batch - 1, 0), name="value") sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), math_ops.maximum( math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss
def _Update_global_variables(): local_vars = [v for g, v in grads_and_vars if g is not None] global_center_vars = [self._global_map[var] for var in local_vars] local_center_vars = [self._local_map[var] for var in local_vars] local_center_vars_update = [] for lvar, var in zip(local_center_vars, global_center_vars): local_center_vars_update.append(lvar.assign(var)) update_ops = [] differences = [] with ops.control_dependencies(local_center_vars_update): for v, lv in zip(local_vars, local_center_vars): with ops.device(v.device): differences.append(math_ops.subtract(v, lv)) for lvar, diff in zip(local_vars, differences): with ops.device(lvar.device): flocking_local = tf.minimum( (self._rep / self._dis) * tf.abs(diff) - self._rep + self._att, self._att) update_ops.append( state_ops.assign_sub( lvar, math_ops.multiply(flocking_local, diff))) for var, diff in zip(global_center_vars, differences): with ops.device(var.device): flocking_g = tf.minimum( (self._rep / self._dis) * tf.abs(diff) - self._rep + self._att, self._att) update_ops.append( state_ops.assign_add( var, math_ops.multiply(flocking_g, diff) / 4)) if global_step: with ops.colocate_with(global_step): update_ops.append(state_ops.assign_add(global_step, 1)) variable_update = control_flow_ops.group(*(update_ops)) return variable_update
def _model_fn(features, labels, mode): _ = labels x = features['x'] y = features['y'] with ops.name_scope('outputs'): predictions = { 'sum': math_ops.add(x, y, name='sum'), 'product': math_ops.multiply(x, y, name='product'), 'difference': math_ops.subtract(x, y, name='difference') } if core: export_outputs = { k: export_output.PredictOutput({k: v}) for k, v in predictions.items() } export_outputs[ signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['sum'] return model_fn.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs, loss=constant_op.constant(0), train_op=control_flow_ops.no_op()) else: output_alternatives = { k: (constants.ProblemType.UNSPECIFIED, { k: v }) for k, v in predictions.items() } return contrib_model_fn.ModelFnOps( mode=mode, predictions=predictions, output_alternatives=output_alternatives, loss=constant_op.constant(0), train_op=control_flow_ops.no_op())
def _get_folded_kernel_bias(conv_type, kernel, bias, mu, var, gamma, beta, epsilon): """ Get folded kernel and bias folded_kernel = kernel * multiplier = kernel * gamma / sigma_bt folded_bias = beta - (mu - bias) * multiplier = beta - (mu - bias) * gamma / sigma """ sigma = math_ops.rsqrt(var + epsilon) if gamma is not None: multiplier = math_ops.mul(gamma, sigma) else: multiplier = sigma if conv_type == 'DepthwiseConv2D': new_shape = [kernel.shape[2], kernel.shape[3]] depthwise_multiplier = array_ops.reshape(multiplier, new_shape) folded_kernel = math_ops.mul( depthwise_multiplier, kernel, name='depthwise_kernel') else: folded_kernel = math_ops.mul(multiplier, kernel, name='kernel') folded_bias = math_ops.subtract(beta, (mu - bias) * multiplier, name='bias') return folded_kernel, folded_bias
def _resource_apply_dense(self, grad, var, constraint, apply_state): update_ops = [] grad = ops.convert_to_tensor(grad, var.dtype.base_dtype) lr = math_ops.cast(self._get_hyper('learning_rate'), var.dtype.base_dtype) if self._momentum: m = math_ops.cast(self._get_hyper('momentum'), var.dtype.base_dtype) momentum_var = self.get_slot(var, 'momentum') modified_grad = momentum_var.assign( math_ops.add(m * momentum_var, (1 - m) * grad)) else: modified_grad = grad v = ops.convert_to_tensor(constraint.lmo(modified_grad), var.dtype.base_dtype) vminvar = math_ops.subtract(v, var) if self.rescale is None: factor = math_ops.cast(1., var.dtype.base_dtype) elif self.rescale == 'diameter': factor = math_ops.cast(1. / constraint.get_diameter(), var.dtype.base_dtype) elif self.rescale == 'gradient': factor = math_ops.cast( tf.norm(modified_grad, ord=2) / tf.norm(vminvar, ord=2), var.dtype.base_dtype) clipped_lr = math_ops.ClipByValue(t=lr * factor, clip_value_min=0, clip_value_max=1) update_ops.append(state_ops.assign_add(var, clipped_lr * vminvar)) return control_flow_ops.group(*update_ops)
def __rsub__(self, other): return math_ops.subtract(other, self)
def __sub__(self, other): return math_ops.subtract(self, other)
def _sampled_scattered_embedding_lookup( params, values, dimension=None, sampled_candidates=None, hash_key=None, name=None): """Looks up embeddings using parameter hashing for each value in `values`. This method looks up selected embedding dimensions if `sampled_candidates` is given, otherwise looks up all dimensions. The i-th embedding component of a value v in `values` is found by retrieving the weight whose index is a fingerprint of the pair (v,i). The concept is explored as "feature hashing" for model compression in this paper: http://arxiv.org/pdf/1504.04788.pdf Feature hashing has the pleasant effect of allowing us to compute an embedding without needing a pre-determined vocabulary, relieving some amount of process complexity. It also allows for us to maintain embeddings for possibly trillions of features with a fixed amount of memory. Note that this is superior to out-of-vocabulary shared "hash buckets" in that the embedding is extremely likely to be unique for each token as opposed to being shared across probably-colliding tokens. The price is that we must compute a hash once for each scalar in the token's embedding as opposed to once per token. If `params` is a list, it represents a partition of the embedding parameters. Each tensor in the list should have the same length, except for the first ones which may have an additional element. For instance 10 parameters can be partitioned in 4 tensors with length `[3, 3, 2, 2]`. Args: params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each tensor must be of rank 1 with fully-defined shape. values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`. dimension: Embedding dimension. The user must specify either `dimension` or `sampled_candidates`. sampled_candidates: An optional `Tensor` of slice indices to keep along the final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is ignored. If `None`, looks up all candidates. hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp (optional). name: An optional name for this op. Returns: A `Tensor` with shape `[d0, ..., dn, dimension]`. If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]` Raises: ValueError: if dimension is not positive or the partition size is invalid. """ if isinstance(params, variables.PartitionedVariable): params = list(params) if not isinstance(params, list): params = [params] with ops.name_scope(name, "scattered_embedding_lookup", params + [dimension, values]): # Flatten the values values_shape = array_ops.shape(values) values = array_ops.reshape(values, [-1, 1]) if sampled_candidates is None: if dimension is None: raise ValueError( "You must specify either dimension or sampled_candidates.") if dimension <= 0: raise ValueError("Dimension must be >0. Given is %d" % dimension) sampled_candidates = array_ops.tile(array_ops.expand_dims( math_ops.range(0, dimension), 0), array_ops.shape(values)) else: dimension = array_ops.shape(sampled_candidates)[ math_ops.subtract(array_ops.rank(sampled_candidates), 1)] sampled_candidates_shape = array_ops.shape(sampled_candidates) dimension_tensor = array_ops.reshape(dimension, shape=[1,]) expected_shape = array_ops.concat([values_shape, dimension_tensor], 0) with ops.control_dependencies([control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(sampled_candidates_shape, expected_shape)), ["The shape of sampled_candidates: ", sampled_candidates_shape, " does not match the shape of values: ", values_shape])]): # Flatten sampled_candidates, same way as values are flattened. sampled_candidates = array_ops.reshape(sampled_candidates, [-1, dimension]) num_partitions = len(params) partition_sizes = [] for p in range(num_partitions): shape = params[p].get_shape() shape.assert_has_rank(1) shape.assert_is_fully_defined() partition_sizes.append(shape[0].value) num_params = sum(partition_sizes) # Total number of parameters. # Assert the size of each partition. for p in range(num_partitions): expected_size = (num_params - p - 1) // num_partitions + 1 if partition_sizes[p] != expected_size: raise ValueError("Tensor %d in params has size %d, expected %d." % (p, partition_sizes[p], expected_size)) # With two values v1 and v2 and 3 dimensions, we will cross # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]]. tensors_to_cross = [sampled_candidates, values] ids = sparse_feature_cross_op.sparse_feature_cross( tensors_to_cross, hashed_output=True, num_buckets=num_params, hash_key=hash_key) ids = sparse_ops.sparse_tensor_to_dense(ids) # No need to validate the indices since we have checked the params # dimensions and we know the largest id. result = embedding_ops.embedding_lookup( params, ids, partition_strategy="div") return array_ops.reshape(result, array_ops.concat([values_shape, [dimension]], 0))
def mean_pairwise_squared_error(labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a pairwise-errors-squared loss to the training procedure. Unlike `mean_squared_error`, which is a measure of the differences between corresponding elements of `predictions` and `labels`, `mean_pairwise_squared_error` is a measure of the differences between pairs of corresponding elements of `predictions` and `labels`. For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are three pairs of differences are summed to compute the loss: loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the corresponding pairs are computed within each batch sample but not across samples within a batch. For example, if `predictions` represents a batch of 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs is drawn from each image, but not across images. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size `[batch_size]`, then the total loss for each sample of the batch is rescaled by the corresponding element in the `weights` vector. Args: labels: The ground truth output tensor, whose shape must match the shape of `predictions`. predictions: The predicted outputs, a tensor of size `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in `predictions`. weights: Coefficients for the loss a scalar, a tensor of shape `[batch_size]` or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If the shape of `predictions` doesn't match that of `labels` or if the shape of `weights` is invalid. Also if `labels` or `predictions` is None. @compatibility(eager) The `loss_collection` argument is ignored when executing eagerly. Consider holding on to the return value or collecting losses via a `tf.keras.Model`. @end_compatibility """ if labels is None: raise ValueError("labels must not be None.") if predictions is None: raise ValueError("predictions must not be None.") with ops.name_scope(scope, "mean_pairwise_squared_error", (predictions, labels, weights)) as scope: weights = math_ops.to_float(weights) labels = math_ops.to_float(labels) with ops.control_dependencies( (weights_broadcast_ops.assert_broadcastable(weights, labels), )): predictions = math_ops.to_float(predictions) predictions.get_shape().assert_is_compatible_with( labels.get_shape()) diffs = math_ops.subtract(predictions, labels) axis = math_ops.range(1, array_ops.rank(diffs)) sum_squares_diff_per_batch = math_ops.reduce_sum( math_ops.square(diffs), axis=axis, keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan( sum_squares_diff_per_batch, math_ops.maximum(num_present_per_batch - 1, 0), name="value") sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), math_ops.maximum( math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) mean_loss = array_ops.where( math_ops.reduce_sum(num_present_per_batch) > 0, loss, array_ops.zeros_like(loss), name="value") util.add_loss(mean_loss, loss_collection) return mean_loss