def _mode(self): mode = (self.a - 1.0) / (self.a_b_sum - 2.0) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.logical_and(math_ops.greater(self.a, 1.0), math_ops.greater(self.b, 1.0)), mode, array_ops.fill(self.batch_shape(), nan, name="nan"), ) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.a, message="Mode not defined for components of a <= 1.", ), check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.b, message="Mode not defined for components of b <= 1.", ), ], mode, )
def _variance(self): var = self._ones() * math_ops.square(self.sigma) * self.df / (self.df - 2) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = math_ops.select( math_ops.greater(self.df, array_ops.fill(self.batch_shape(), 2.0)), var, array_ops.fill(self.batch_shape(), inf, name="inf"), ) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.greater(self.df, self._ones()), result_where_defined, array_ops.fill(self.batch_shape(), nan, name="nan"), ) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="variance not defined for components of df <= 1", ) ], result_where_defined, )
def _variance(self): # We need to put the tf.where inside the outer tf.where to ensure we never # hit a NaN in the gradient. denom = array_ops.where(math_ops.greater(self.df, 2.), self.df - 2., array_ops.ones_like(self.df)) # Abs(scale) superfluous. var = (array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) * math_ops.square(self.scale) * self.df / denom) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = array_ops.where( self.df > array_ops.fill(self.batch_shape_tensor(), 2.), var, array_ops.fill(self.batch_shape_tensor(), inf, name="inf")) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return array_ops.where( math_ops.greater( self.df, array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)), result_where_defined, array_ops.fill(self.batch_shape_tensor(), nan, name="nan")) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones([], dtype=self.dtype), self.df, message="variance not defined for components of df <= 1"), ], result_where_defined)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `a > 1`, `b > 1`. This returns the mode when `a > 1` and `b > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Beta distribution. """ with ops.name_scope(self.name): with ops.op_scope([self._a, self._b, self._a_b_sum], name): a = self._a b = self._b a_b_sum = self._a_b_sum one = constant_op.constant(1, self.dtype) mode = (a - 1)/ (a_b_sum - 2) if self.allow_nan_stats: return math_ops.select( math_ops.logical_and( math_ops.greater(a, 1), math_ops.greater(b, 1)), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(a_b_sum, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less(one, a), check_ops.assert_less(one, b)], mode)
def compute_lr(self, grad, var): scaled_lr = self._learning_rate if self._skip_list is None or not any(v in var.name for v in self._skip_list): w_norm = linalg_ops.norm(var, ord=2) g_norm = linalg_ops.norm(grad, ord=2) trust_ratio = array_ops.where( math_ops.greater(w_norm, 0), array_ops.where( math_ops.greater(g_norm, 0), (self._eeta * w_norm / (g_norm + self._weight_decay * w_norm + self._epsilon)), 1.0), 1.0) scaled_lr = self._learning_rate * trust_ratio return scaled_lr
def _prune_invalid_weights(sparse_ids, sparse_weights): """Prune invalid weights (< 0) from the input ids and weights.""" if sparse_weights is not None: is_weights_valid = math_ops.greater(sparse_weights.values, 0) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) return sparse_ids, sparse_weights
def gcd(a, b, name=None): """Returns the greatest common divisor via Euclid's algorithm. Args: a: The dividend. A scalar integer `Tensor`. b: The divisor. A scalar integer `Tensor`. name: An optional name for the operation. Returns: A scalar `Tensor` representing the greatest common divisor between `a` and `b`. Raises: ValueError: If `a` or `b` are not scalar integers. """ with ops.name_scope(name, 'gcd', [a, b]): a = ops.convert_to_tensor(a) b = ops.convert_to_tensor(b) a.shape.assert_has_rank(0) b.shape.assert_has_rank(0) if not a.dtype.is_integer: raise ValueError('a must be an integer type. Got: %s' % a.dtype) if not b.dtype.is_integer: raise ValueError('b must be an integer type. Got: %s' % b.dtype) cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b)) body = lambda a, b: [b, math_ops.mod(a, b)] a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False) return a
def testLargeCase(self): shape = [32, 512, 256, 1] predictions = random_ops.random_uniform( shape, 0.0, 1.0, dtype=dtypes_lib.float32) labels = math_ops.greater(random_ops.random_uniform(shape, 0.0, 1.0), 0.5) result, update_op = metric_ops.precision_recall_at_equal_thresholds( labels=labels, predictions=predictions, num_thresholds=201) # Run many updates, enough to cause highly inaccurate values if the # code used float32 for accumulation. num_updates = 71 with self.test_session() as sess: sess.run(variables.local_variables_initializer()) for _ in xrange(num_updates): sess.run(update_op) prdata = sess.run(result) # Since we use random values, we won't know the tp/fp/tn/fn values, but # tp and fp at threshold 0 should be the total number of positive and # negative labels, hence their sum should be total number of pixels. expected_value = 1.0 * np.product(shape) * num_updates got_value = prdata.tp[0] + prdata.fp[0] # They should be at least within 1. self.assertNear(got_value, expected_value, 1.0)
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
def mode(self, name="mode"): """Mode of the distribution. Note that the mode for the Beta distribution is only defined when `alpha > 1`. This returns the mode when `alpha > 1`, and NaN otherwise. If `self.allow_nan_stats` is `False`, an exception will be raised rather than returning `NaN`. Args: name: The name for this op. Returns: Mode of the Dirichlet distribution. """ with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._alpha_0], name): one = constant_op.constant(1, self.dtype) mode = (self._alpha - 1)/ ( array_ops.expand_dims(self._alpha_0, -1) - math_ops.cast( self.event_shape()[0], self.dtype)) if self.allow_nan_stats: return math_ops.select( math_ops.greater(self._alpha, 1), mode, (constant_op.constant(float("NaN"), dtype=self.dtype) * array_ops.ones_like(self._alpha, dtype=self.dtype))) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( one, self._alpha, message="mode not defined for components of alpha <= 1") ], mode)
def map_and_filter_functions(): identity = lambda x: x increment = lambda x: x + 1 minus_five = lambda x: x - 5 def increment_and_square(x): y = x + 1 return y * y take_all = lambda x: constant_op.constant(True) is_zero = lambda x: math_ops.equal(x, 0) is_odd = lambda x: math_ops.equal(x % 2, 0) greater = lambda x: math_ops.greater(x + 5, 0) functions = [identity, increment, minus_five, increment_and_square] filters = [take_all, is_zero, is_odd, greater] tests = [] for x, fun in enumerate(functions): for y, predicate in enumerate(filters): tests.append(("Mixed{}{}".format(x, y), fun, predicate)) # Multi output tests.append(("Multi1", lambda x: (x, x), lambda x, y: constant_op.constant(True))) tests.append( ("Multi2", lambda x: (x, 2), lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0))) return tuple(tests)
def _get_stratified_batch_from_tensors(val, label, reject_probs, batch_size, queue_threads=3): """Reject examples one-at-a-time based on class.""" # Make rejection probabilities into a tensor so they can be dynamically # accessed by tensors. reject_probs = constant_op.constant( reject_probs, dtype=dtypes.float32, name='rejection_probabilities') # Make queue that will have proper class proportions. Contains exactly one # batch at a time. val_shape = val.get_shape() label_shape = label.get_shape() final_q = data_flow_ops.FIFOQueue(capacity=batch_size, shapes=[val_shape, label_shape], dtypes=[val.dtype, label.dtype], name='batched_queue') # Conditionally enqueue. eq_tf = array_ops.reshape(math_ops.greater( random_ops.random_uniform([1]), array_ops.slice(reject_probs, [label], [1])), []) conditional_enqueue = control_flow_ops.cond( eq_tf, lambda: final_q.enqueue([val, label]), control_flow_ops.no_op) queue_runner.add_queue_runner(queue_runner.QueueRunner( final_q, [conditional_enqueue] * queue_threads)) return final_q.dequeue_many(batch_size)
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ if isinstance(denominator, float): if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) if context.in_eager_mode() and denominator._rank() == 0: # pylint: disable=protected-access if math_ops.equal(denominator, 0.0): return ops.convert_to_tensor(0.0, dtype=numerator.dtype) return math_ops.div(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ if compat.forward_compatible(2018, 11, 1): return math_ops.div_no_nan(numerator, denominator, name=name) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def filter_functions(): take_all = lambda x: constant_op.constant(True) is_zero = lambda x: math_ops.equal(x, 0) greater = lambda x: math_ops.greater(x + 5, 0) tests = [] filters = [take_all, is_zero, greater] identity = lambda x: x for x, predicate_1 in enumerate(filters): for y, predicate_2 in enumerate(filters): tests.append(("Mixed{}{}".format(x, y), identity, [predicate_1, predicate_2])) for z, predicate_3 in enumerate(filters): tests.append(("Mixed{}{}{}".format(x, y, z), identity, [predicate_1, predicate_2, predicate_3])) take_all_multiple = lambda x, y: constant_op.constant(True) # Multi output tests.append(("Multi1", lambda x: (x, x), [take_all_multiple, take_all_multiple])) tests.append(("Multi2", lambda x: (x, 2), [ take_all_multiple, lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0) ])) return tuple(tests)
def string_input_producer(string_tensor, num_epochs=None, shuffle=True, seed=None, capacity=32, shared_name=None, name=None, cancel_op=None): """Output strings (e.g. filenames) to a queue for an input pipeline. Note: if `num_epochs` is not `None`, this function creates local counter `epochs`. Use `local_variable_initializer()` to initialize local variables. Args: string_tensor: A 1-D string tensor with the strings to produce. num_epochs: An integer (optional). If specified, `string_input_producer` produces each string from `string_tensor` `num_epochs` times before generating an `OutOfRange` error. If not specified, `string_input_producer` can cycle through the strings in `string_tensor` an unlimited number of times. shuffle: Boolean. If true, the strings are randomly shuffled within each epoch. seed: An integer (optional). Seed used if shuffle == True. capacity: An integer. Sets the queue capacity. shared_name: (optional). If set, this queue will be shared under the given name across multiple sessions. name: A name for the operations (optional). cancel_op: Cancel op for the queue (optional). Returns: A queue with the output strings. A `QueueRunner` for the Queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. Raises: ValueError: If the string_tensor is a null Python list. At runtime, will fail with an assertion if string_tensor becomes a null tensor. """ not_null_err = "string_input_producer requires a non-null input tensor" if not isinstance(string_tensor, ops.Tensor) and not string_tensor: raise ValueError(not_null_err) with ops.name_scope(name, "input_producer", [string_tensor]) as name: string_tensor = ops.convert_to_tensor(string_tensor, dtype=dtypes.string) with ops.control_dependencies([ control_flow_ops.Assert( math_ops.greater(array_ops.size(string_tensor), 0), [not_null_err])]): string_tensor = array_ops.identity(string_tensor) return input_producer( input_tensor=string_tensor, element_shape=[], num_epochs=num_epochs, shuffle=shuffle, seed=seed, capacity=capacity, shared_name=shared_name, name=name, summary_name="fraction_of_%d_full" % capacity, cancel_op=cancel_op)
def _logits_to_prediction(self, logits=None): predictions = {PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.PROBABILITIES] = math_ops.sigmoid(logits) predictions[PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def _SegmentMinGrad(op, grad): """Gradient for SegmentMin.""" zeros = array_ops.zeros(array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype) gathered_grads = array_ops.gather(grad, op.inputs[1]) gathered_outputs = array_ops.gather(op.outputs[0], op.inputs[1]) return math_ops.select(math_ops.greater(op.inputs[0], gathered_outputs), zeros, gathered_grads), None
def _prune_invalid_ids(sparse_ids, sparse_weights): """Prune invalid IDs (< 0) from the input ids and weights.""" is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and(is_id_valid, math_ops.greater(sparse_weights.values, 0)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights
def AddForwardAccumulateLoop(self, value): """Add an accumulation loop for each value needed in backprop. This is added to the forward loop at the first time when a value in the forward loop is used by backprop gradient computation loop. The pseudocode is: ``` acc; while (_pivot) { if (index == 0) [value] else Concat(acc, [value]); } ``` Args: value: The tensor that is accumulated. Returns: The accumulated history of value. Raises: ValueError: If the shape of "value" is not known statically. """ if not value.get_shape().is_fully_defined(): raise ValueError("Must have known shape: %s" % value) self._grad_context.Exit() # TODO(irving): Now that acc starts out empty, most of the # conditional logic can go away. acc = constant_op.constant([], value.dtype, shape=[0] + value.get_shape().as_list(), name="f_acc") self.Enter() self.AddName(acc.name) enter_acc = _Enter(acc, self._name, is_constant=False, parallel_iterations=self._parallel_iterations, name="f_acc") merge_acc = merge([enter_acc, enter_acc])[0] switch_acc = switch(merge_acc, self._pivot) # If index = 0 then [value] else Concat(acc, [value]). cond = math_ops.greater(self._index, 0) switch_add_acc = switch(switch_acc[1], cond) expand_value = array_ops.expand_dims(value, 0) true_branch = array_ops.concat(0, [switch_add_acc[1], expand_value]) false_branch = array_ops.identity(switch_add_acc[0]) false_branch = with_dependencies([false_branch], expand_value) add_acc = merge([false_branch, true_branch])[0] next_acc = next_iteration(add_acc) merge_acc.op._update_input(1, next_acc) exit_acc = exit(switch_acc[0], name="f_acc") self.Exit() self._grad_context.Enter() return exit_acc
def _logits_to_prediction(self, logits=None): predictions = {PedictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits) # Workaround for argmax dropping the second demension. predictions[PedictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def _compute_accuracy(logits, targets, weights=None): if self._n_classes > 2: _, predictions = nn.top_k(logits, 1) else: predictions = array_ops.reshape(logits, [-1]) predictions = math_ops.greater(predictions, array_ops.zeros_like(predictions)) targets = array_ops.reshape(targets, [-1]) return metrics_lib.streaming_accuracy( math_ops.to_int32(predictions), math_ops.to_int32(targets), weights)
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" with ops.name_scope(None, "predictions", (logits,)): return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.PROBABILITIES: math_ops.sigmoid( logits, name=prediction_key.PredictionKey.PROBABILITIES), prediction_key.PredictionKey.CLASSES: math_ops.to_int64( math_ops.greater(logits, 0), name=prediction_key.PredictionKey.CLASSES) }
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" predictions = {prediction_key.PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.PROBABILITIES] = math_ops.sigmoid( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32(math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = constant_op.constant( 0.5 * math.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) y = array_ops.where(math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), array_ops.where(math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y
def softplus_inverse(x, name=None): """Computes the inverse softplus, i.e., x = softplus_inverse(softplus(x)). Mathematically this op is equivalent to: ```none softplus_inverse = log(exp(x) - 1.) ``` Args: x: `Tensor`. Non-negative (not enforced), floating-point. name: A name for the operation (optional). Returns: `Tensor`. Has the same type/shape as input `x`. """ with ops.name_scope(name, "softplus_inverse", values=[x]): x = ops.convert_to_tensor(x, name="x") # We begin by deriving a more numerically stable softplus_inverse: # x = softplus(y) = Log[1 + exp{y}], (which means x > 0). # ==> exp{x} = 1 + exp{y} (1) # ==> y = Log[exp{x} - 1] (2) # = Log[(exp{x} - 1) / exp{x}] + Log[exp{x}] # = Log[(1 - exp{-x}) / 1] + Log[exp{x}] # = Log[1 - exp{-x}] + x (3) # (2) is the "obvious" inverse, but (3) is more stable than (2) for large x. # For small x (e.g. x = 1e-10), (3) will become -inf since 1 - exp{-x} will # be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0. # # In addition to the numerically stable derivation above, we clamp # small/large values to be congruent with the logic in: # tensorflow/core/kernels/softplus_op.h # # Finally, we set the input to one whenever the input is too large or too # small. This ensures that no unchosen codepath is +/- inf. This is # necessary to ensure the gradient doesn't get NaNs. Recall that the # gradient of `where` behaves like `pred*pred_true + (1-pred)*pred_false` # thus an `inf` in an unselected path results in `0*inf=nan`. We are careful # to overwrite `x` with ones only when we will never actually use this # value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`. threshold = np.log(np.finfo(x.dtype.as_numpy_dtype).eps) + 2. is_too_small = math_ops.less(x, np.exp(threshold)) is_too_large = math_ops.greater(x, -threshold) too_small_value = math_ops.log(x) too_large_value = x # This `where` will ultimately be a NOP because we won't select this # codepath whenever we used the surrogate `ones_like`. x = array_ops.where(math_ops.logical_or(is_too_small, is_too_large), array_ops.ones_like(x), x) y = x + math_ops.log(-math_ops.expm1(-x)) # == log(expm1(x)) return array_ops.where(is_too_small, too_small_value, array_ops.where(is_too_large, too_large_value, y))
def _mean(self): mean = self.mu * self._ones() if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return array_ops.where( math_ops.greater(self.df, self._ones()), mean, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="mean not defined for components of df <= 1"), ], mean)
def make_tril_ids(n): """Internal helper to create vector of linear indices into y.""" cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n]) rows = array_ops.tile( array_ops.expand_dims(math_ops.range(n), -1), [1, n]) pred = math_ops.greater(cols, rows) tril_ids = array_ops.tile(array_ops.reshape( math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols tril_ids = math_ops.select(pred, array_ops.zeros([n, n], dtype=dtypes.int32), tril_ids + 1) tril_ids = array_ops.reshape(tril_ids, [-1]) return tril_ids
def safe_divide(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return tf.where( math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator), name=name)
def step_fn(inputs): """The computation to run on each worker.""" features, labels = inputs with backprop.GradientTape() as tape: pred = model(features, training=True) loss = keras.losses.binary_crossentropy(labels, pred) loss = nn.compute_average_loss(loss) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients( list(zip(grads, model.trainable_variables))) actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) accuracy.update_state(labels, actual_pred)
def _safe_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return tf.where( math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator), name=name)
def stateless_list_files(file_pattern, shuffle=None, seed=None): """A dataset of all files matching one or more glob patterns. Note that, if `shuffle` is not None, it will use a stateless shuffle implementation. Then the returned dataset supports the TF1 compatibility API `tf.data.make_one_shot_iterator()` in TF2. Example: >>> dataset = tf.stateless_list_files("some_file_pattern") Args: file_pattern: A string, a list of strings, or a `tf.Tensor` of string type (scalar or vector), representing the filename glob (i.e. shell wildcard) pattern(s) that will be matched. shuffle: (Optional.) If `True`, the file names will be shuffled randomly based on a stateless implementation. Defaults to `True`. seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random seed that will be used to create the distribution. See `tf.random.set_seed` for behavior. Returns: Dataset: A `Dataset` of strings corresponding to file names. """ with ops.name_scope("list_files"): if shuffle is None: shuffle = True file_pattern = ops.convert_to_tensor( file_pattern, dtype=string, name="file_pattern") matching_files = gen_io_ops.matching_files(file_pattern) # Raise an exception if `file_pattern` does not match any files. condition = math_ops.greater( array_ops.shape(matching_files)[0], 0, name="match_not_empty") message = math_ops.add( "No files matched pattern: ", strings.reduce_join(file_pattern, separator=", "), name="message") assert_not_empty = debugging.Assert( condition, [message], summarize=1, name="assert_not_empty") with control_dependencies([assert_not_empty]): matching_files = identity(matching_files) dataset = data.Dataset.from_tensor_slices(matching_files) if shuffle: buffer_size = math_ops.maximum( shape(matching_files, out_type=dtypes.int64)[0], 1) # Use stateless shuffled dataset dataset = dataset.apply(stateless_shuffle_dataset(buffer_size, seed=seed)) return dataset
def _safe_div(numerator, denominator, name): """Divides two tensors element-wise, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ t = math_ops.truediv(numerator, denominator) zero = array_ops.zeros_like(t, dtype=denominator.dtype) condition = math_ops.greater(denominator, zero) zero = math_ops.cast(zero, t.dtype) return array_ops.where(condition, t, zero, name=name)
def calculate_bboxes_intersection(self, bbox_ref, bboxes): bboxes = tf.transpose(bboxes) bbox_ref = tf.transpose(bbox_ref) int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) inter_vol = tf.maximum(int_ymax - int_ymin, 0.) * tf.maximum( int_xmax - int_xmin, 0.) bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) scores = tf.where(math_ops.greater(bboxes_vol, 0), math_ops.divide(inter_vol, bboxes_vol), tf.zeros_like(inter_vol)) return scores
def _inverse_max_dcg(self, labels): """Computes the inverse of max DCG.""" ideal_sorted_labels, = utils.sort_by_scores(labels, [labels], topn=self._topn) rank = math_ops.range(array_ops.shape(ideal_sorted_labels)[1]) + 1 discounted_gain = self._gain_fn( ideal_sorted_labels) * self._rank_discount_fn( math_ops.to_float(rank)) discounted_gain = math_ops.reduce_sum(discounted_gain, 1, keepdims=True) return array_ops.where(math_ops.greater(discounted_gain, 0.), 1. / discounted_gain, array_ops.zeros_like(discounted_gain))
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) max_value = math_ops.reduce_max(abs_weights) histogram = _histogram(abs_weights, [0.0, max_value], nbins=self._spec.nbins, dtype=np.float32) cdf = math_ops.cumsum(histogram) norm_cdf = math_ops.div(cdf, math_ops.reduce_sum(histogram)) current_threshold = math_ops.multiply( math_ops.div( math_ops.reduce_sum( math_ops.cast(math_ops.less(norm_cdf, self._sparsity), np.float32)), float(self._spec.nbins)), max_value) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater(abs_weights, smoothed_threshold), np.float32) return smoothed_threshold, new_mask
def normalBehaviorHelper(self, sampler): # Set up graph. random_seed.set_random_seed(1234) lbl1 = 0 lbl2 = 3 # This cond allows the necessary class queues to be populated. label = control_flow_ops.cond( math_ops.greater(.5, random_ops.random_uniform([])), lambda: constant_op.constant(lbl1), lambda: constant_op.constant(lbl2)) val = [np.array([1, 4]) * label] probs = np.array([.8, 0, 0, .2, 0]) batch_size = 16 data_batch, labels = sampler(val, label, probs, batch_size) # Run session and keep track of how frequently the labels and values appear. data_l = [] label_l = [] with self.cached_session() as sess: # Need to initialize variables that keep running total of classes seen. variables.global_variables_initializer().run() coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(coord=coord) for _ in range(20): [data], lbls = sess.run([data_batch, labels]) data_l.append(data) label_l.append(lbls) coord.request_stop() coord.join(threads) # First check that the data matches the labels. for lbl, data in zip(label_l, data_l): for i in range(batch_size): self.assertListEqual(list(np.array([1, 4]) * lbl[i]), list(data[i, :])) # Check that the labels are approximately correct. expected_label = probs[0] * lbl1 + probs[3] * lbl2 lbl_list = range(len(probs)) lbl_std_dev = np.sqrt(np.sum((np.square(lbl_list - expected_label)))) lbl_std_dev_of_mean = lbl_std_dev / np.sqrt(len(label_l)) # CLT actual_lbl = np.mean(label_l) # Tolerance is 3 standard deviations of the mean. According to the central # limit theorem, this should cover 99.7% of cases. Note that since the seed # is fixed, for a given implementation, this test will pass or fail 100% of # the time. This use of assertNear is to cover cases where someone changes # an implementation detail, which would cause the random behavior to differ. self.assertNear(actual_lbl, expected_label, 3 * lbl_std_dev_of_mean)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') shutil.rmtree(FLAGS.saved_model_path) # Create the graph zero = constant_op.constant(0) one = variable_scope.get_variable(name='y', initializer=[1]) neg_one = variable_scope.get_variable(name='z', initializer=[-1]) x = array_ops.placeholder(dtypes.int32, shape=(), name='input') r = control_flow_ops.cond( x < zero, lambda: math_ops.cast(math_ops.greater(x, one), dtypes.int32), lambda: math_ops.cast(math_ops.greater(x, neg_one), dtypes.int32)) sess = session.Session() sess.run(variables.global_variables_initializer()) sm_builder = builder.SavedModelBuilder(FLAGS.saved_model_path) tensor_info_x = utils.build_tensor_info(x) tensor_info_r = utils.build_tensor_info(r) func_signature = ( signature_def_utils.build_signature_def( inputs={'x': tensor_info_x}, outputs={'r': tensor_info_r}, method_name=signature_constants.PREDICT_METHOD_NAME)) sm_builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], signature_def_map={ 'serving_default': func_signature, signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: func_signature, }, strip_default_attrs=True) sm_builder.save()
def refresh_shortlist(): """Update the shortlist with the highest scores in id_to_score.""" new_scores, new_ids = nn_ops.top_k(self.id_to_score, self.shortlist_size) smallest_new_score = math_ops.reduce_min(new_scores) new_length = math_ops.reduce_sum( math_ops.to_int32( math_ops.greater(new_scores, dtypes.float32.min))) u1 = self.sl_ids.assign( math_ops.to_int64(array_ops.concat([[new_length], new_ids], 0))) u2 = self.sl_scores.assign( array_ops.concat([[smallest_new_score], new_scores], 0)) self.last_ops = [u1, u2] return control_flow_ops.group(u1, u2)
def compute_mean_iou(total_cm, name): """Compute the mean intersection-over-union via the confusion matrix.""" sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) denominator = sum_over_row + sum_over_col - cm_diag # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = array_ops.where(math_ops.greater(denominator, 0), denominator, array_ops.ones_like(denominator)) iou = math_ops.div(cm_diag, denominator) print(iou.eval()) return math_ops.reduce_mean(iou, name=name)
def _apply_dense(self, grad, var): lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_power, beta2_power = self._get_beta_accumulators() beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) eps = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) wd_lambda = math_ops.cast(self._wd_t, var.dtype.base_dtype) v = self.get_slot(var, "v") v_t = v.assign(beta2_t * v + (1. - beta2_t) * grad**2) m = self.get_slot(var, "m") m_t = m.assign(beta1_t * m + (1. - beta1_t) * grad) # add l2 normalizations and set ratio r1 = tf.sqrt(tf.reduce_sum(tf.square(var))) step = m_t / (tf.sqrt(v_t) + eps) + wd_lambda * var r2 = tf.sqrt(tf.reduce_sum(tf.square(step))) ratio = array_ops.where( math_ops.greater(r1, 0), array_ops.where(math_ops.greater(r2, 0), tf.minimum(r1, 10) / r2, 1.0), 1.0) var_update = state_ops.assign_sub(var, lr_t * ratio * step) return control_flow_ops.group(*[var_update, v_t, m_t])
def make_tril_ids(n): """Internal helper to create vector of linear indices into y.""" cols = array_ops.reshape(array_ops.tile(math_ops.range(n), [n]), [n, n]) rows = array_ops.tile(array_ops.expand_dims(math_ops.range(n), -1), [1, n]) pred = math_ops.greater(cols, rows) tril_ids = array_ops.tile( array_ops.reshape(math_ops.cumsum(math_ops.range(n)), [n, 1]), [1, n]) + cols tril_ids = math_ops.select( pred, array_ops.zeros([n, n], dtype=dtypes.int32), tril_ids + 1) tril_ids = array_ops.reshape(tril_ids, [-1]) return tril_ids
def string_input_producer(string_tensor, num_epochs=None, shuffle=True, seed=None, capacity=32, shared_name=None, name=None): """Output strings (e.g. filenames) to a queue for an input pipeline. Args: string_tensor: A 1-D string tensor with the strings to produce. num_epochs: An integer (optional). If specified, `string_input_producer` produces each string from `string_tensor` `num_epochs` times before generating an `OutOfRange` error. If not specified, `string_input_producer` can cycle through the strings in `string_tensor` an unlimited number of times. shuffle: Boolean. If true, the strings are randomly shuffled within each epoch. seed: An integer (optional). Seed used if shuffle == True. capacity: An integer. Sets the queue capacity. shared_name: (optional). If set, this queue will be shared under the given name across multiple sessions. name: A name for the operations (optional). Returns: A queue with the output strings. A `QueueRunner` for the Queue is added to the current `Graph`'s `QUEUE_RUNNER` collection. Raises: ValueError: If the string_tensor is a null Python list. At runtime, will fail with an assertion if string_tensor becomes a null tensor. """ not_null_err = "string_input_producer requires a non-null input tensor" if not isinstance(string_tensor, ops.Tensor) and not string_tensor: raise ValueError(not_null_err) with ops.name_scope(name, "input_producer", [string_tensor]) as name: string_tensor = ops.convert_to_tensor(string_tensor, dtype=dtypes.string) with ops.control_dependencies([ control_flow_ops.Assert( math_ops.greater(array_ops.size(string_tensor), 0), [not_null_err])]): string_tensor = array_ops.identity(string_tensor) return input_producer( input_tensor=string_tensor, element_shape=[], num_epochs=num_epochs, shuffle=shuffle, seed=seed, capacity=capacity, shared_name=shared_name, name=name, summary_name="fraction_of_%d_full" % capacity)
def replica_fn(iterator): batch_data, labels = next(iterator) with backprop.GradientTape() as tape: pred = model(batch_data, training=True) loss = nn.compute_average_loss( keras.losses.BinaryCrossentropy( reduction=losses_utils.ReductionV2.NONE)(labels, pred)) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients( zip(gradients, model.trainable_variables)) actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64) accuracy.update_state(labels, actual_pred)
def __call__(self, step): """ return a float(learning rate) """ # decrease linearly steprate = math_ops.abs(math_ops.divide( math_ops.subtract(self.finallr, self.initlr), self.nsteps)) lr = math_ops.subtract(self.initlr, math_ops.multiply( steprate, math_ops.subtract(step, self.shiftstep))) pred = math_ops.greater(step, self.shiftstep) lr = control_flow_ops.cond(pred, lambda: lr, lambda: self.initlr) return lr
def collatz(x): counter = constant_op.constant(0, dtype=dtypes.int32) while math_ops.greater(x, 1): counter = counter + 1 gen_debug_ops.debug_identity_v2( x, tfdbg_context_id="deadbeaf", op_name="x", output_slot=0, tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR, debug_urls=["file://%s" % self.dump_root]) if math_ops.equal(x % 2, 0): x = math_ops.div(x, 2) else: x = x * 3 + 1 return counter
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" predictions = {prediction_key.PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[ prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits, name=prediction_key.PredictionKey.LOGISTIC) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[ prediction_key.PredictionKey.PROBABILITIES] = math_ops.sigmoid( logits, name=prediction_key.PredictionKey.PROBABILITIES) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0), name=prediction_key.PredictionKey.CLASSES) return predictions
def _mode(self): mode = ((self.alpha - 1.) / (array_ops.expand_dims(self.alpha_sum, dim=-1) - math_ops.cast(self.event_shape()[0], self.dtype))) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) shape = array_ops.concat(0, (self.batch_shape(), self.event_shape())) return math_ops.select(math_ops.greater(self.alpha, 1.), mode, array_ops.fill(shape, nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.alpha, message="mode not defined for components of alpha <= 1") ], mode)
def _UnsortedSegmentProdGrad(op, grad): """ Gradient for UnsortedSegmentProd. The gradient can be expressed for each segment by dividing the segment's product by each element of the segment input tensor, but this approach can't deal with zeros in the input. Unlike reduce_prod we can't use cumsum here as individual segments may have a different number of elements. Therefore we consider three cases: 1) A segment input contains no zeros and we can safely divide by the input tensor. 2) A segment contains exactly one zero. Then the gradient of each input of the segment is zero except for the 0-input, there the gradient is the product of the remaining segment entries. 3) A segment contains at least two zeros. The gradient is zero for all segment inputs. """ # Note that unsorted_segment_sum will filter out the negative indices, # so we don't need to do a logical_and with is_positive here is_zero = math_ops.equal(op.inputs[0], 0) num_zeros = gen_math_ops.unsorted_segment_sum( math_ops.cast(is_zero, dtype=dtypes.int32), op.inputs[1], op.inputs[2]) # handle case 3 and set the gradient to 0 for segments with more than one # 0 as input grad = array_ops.where(math_ops.greater(num_zeros, 1), array_ops.zeros_like(grad), grad) # replace all zeros with ones and compute the unsorted_segment_prod non_zero_data = array_ops.where(is_zero, array_ops.ones_like(op.inputs[0]), op.inputs[0]) non_zero_prod = gen_math_ops.unsorted_segment_prod(non_zero_data, op.inputs[1], op.inputs[2]) # clip the indices for gather to be positive zero_clipped_indices = math_ops.maximum(op.inputs[1], array_ops.zeros_like(op.inputs[1])) gathered_prod = array_ops.gather(op.outputs[0], zero_clipped_indices) gathered_non_zero_prod = array_ops.gather(non_zero_prod, zero_clipped_indices) prod_divided_by_el = gathered_prod / op.inputs[0] # May contain nan/inf. # Now fetch the individual results for segments containing 0 and those that # don't. is_zero will also fetch results for entries with negative index # but the following gather_drop_negatives sets the corresponding entry in # grad to 0 for these partial_derivative = array_ops.where(is_zero, gathered_non_zero_prod, prod_divided_by_el) gathered_grad = _GatherDropNegatives(grad, op.inputs[1], zero_clipped_indices)[0] return gathered_grad * partial_derivative, None, None
def assert_greater(x, y, data=None, summarize=None, message=None, name=None): """Assert the condition `x > y` holds element-wise. Example of adding a dependency to an operation: ```python with tf.control_dependencies([tf.assert_greater(x, y)]): output = tf.reduce_sum(x) ``` This condition holds if for every pair of (possibly broadcast) elements `x[i]`, `y[i]`, we have `x[i] > y[i]`. If both `x` and `y` are empty, this is trivially satisfied. Args: x: Numeric `Tensor`. y: Numeric `Tensor`, same dtype as and broadcastable to `x`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`, `y`. summarize: Print this many entries of each tensor. message: A string to prefix to the default message. name: A name for this operation (optional). Defaults to "assert_greater". Returns: Op that raises `InvalidArgumentError` if `x > y` is False. """ message = message or '' with ops.name_scope(name, 'assert_greater', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: x_name = x.name y_name = y.name if data is None: data = [ message, 'Condition x > y did not hold element-wise:' 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.greater(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize)
def _safe_mean(losses, num_present): """Computes a safe mean of the losses. Args: losses: A tensor whose elements contain individual loss measurements. num_present: The number of measurable losses in the tensor. Returns: A scalar representing the mean of the losses. If `num_present` is zero, then zero is returned. """ total_loss = math_ops.reduce_sum(losses) return math_ops.select( math_ops.greater(num_present, 0), math_ops.div(total_loss, math_ops.select( math_ops.equal(num_present, 0), 1.0, num_present)), array_ops.zeros_like(total_loss), name="value")
def testIf(self): @function.Defun(dtypes.float32) def Twice(x): return x * 2 @function.Defun(dtypes.float32) def Thrice(x): return x * 3 + 1 with self.test_session(use_gpu=False) as sess: x = array_ops.placeholder(dtypes.float32) ret = functional_ops.If(math_ops.greater(x, 0), [x], Twice, Thrice)[0] self.assertAllEqual(sess.run(ret, feed_dict={x: 9.}), 18.) self.assertAllEqual(sess.run(ret, feed_dict={x: -8.}), -23.) self.assertAllEqual(sess.run(ret, feed_dict={x: 0.}), 1.)
def _resource_apply_dense(self, grad, var): var_dtype = var.dtype.base_dtype lr_t = self._decayed_lr(var_dtype) m = self.get_slot(var, 'm') v = self.get_slot(var, 'v') beta_1_t = self._get_hyper('beta_1', var_dtype) beta_2_t = self._get_hyper('beta_2', var_dtype) epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype) local_step = math_ops.cast(self.iterations + 1, var_dtype) beta_1_power = math_ops.pow(beta_1_t, local_step) beta_2_power = math_ops.pow(beta_2_t, local_step) ro_inf = 2. / (1. - beta_2_t) - 1. # max len of the approx SMA g_t = grad # v_t = beta2*v + (1-beta2)*g_t^2 v_t = beta_2_t * v + (1. - beta_2_t) * math_ops.square(g_t) v_t = state_ops.assign(v, v_t, use_locking=self._use_locking) # m_t = beta1*m + (1-beta1)*g_t m_t = beta_1_t * m + (1. - beta_1_t) * g_t m_t = state_ops.assign(m, m_t, use_locking=self._use_locking) m_t_hat = m_t / (1. - beta_1_power) t = local_step ro_t = ro_inf - 2. * t * beta_2_power / (1. - beta_2_power) # len of the approx. SMA def f1(): v_t_hat = math_ops.sqrt(v_t / (1 - beta_2_power)) r_t = math_ops.sqrt(((ro_t - 4) * (ro_t - 2) * ro_inf) / ((ro_inf - 4) * (ro_inf - 2) * ro_t)) return lr_t * r_t * m_t_hat / (v_t_hat + epsilon_t) def f2(): return lr_t * m_t_hat with ops.control_dependencies([m_t, v_t]): var_delta = control_flow_ops.cond(math_ops.greater(ro_t, 4), true_fn=f1, false_fn=f2) var_up = state_ops.assign_sub(var, var_delta, use_locking=self._use_locking) return control_flow_ops.group(*[var_up, v_t, m_t])
def compute_mean_Dice_score(name): #_, total_cm_new = tf.split(total_cm,[1,2],1) #_, total_cm_new = tf.split(total_cm_new,[1,2],0) print(total_cm.shape) """Compute the mean_Dice_score via the confusion matrix.""" sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) denominator = sum_over_row + sum_over_col # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = array_ops.where(math_ops.greater(denominator, 0), denominator, array_ops.ones_like(denominator)) Dice_score = math_ops.div(cm_diag * 2, denominator) SS = math_ops.div(cm_diag, sum_over_col) #return math_ops.reduce_mean(Dice_score, name=name) return Dice_score, SS #return per class dsc
def _apply_sparse_shared(self, grad, var, indices, scatter_add): step, beta1_power, beta2_power = self._get_beta_accumulators() step = math_ops.cast(step, var.dtype.base_dtype) beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype) lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) # m_t = beta1 * m + (1 - beta1) * g_t m = self.get_slot(var, "m") m_scaled_g_values = grad * (1 - beta1_t) m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking) with ops.control_dependencies([m_t]): m_t = scatter_add(m, indices, m_scaled_g_values) # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) v = self.get_slot(var, "v") v_scaled_g_values = (grad * grad) * (1 - beta2_t) v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking) with ops.control_dependencies([v_t]): v_t = scatter_add(v, indices, v_scaled_g_values) rho_inf = math_ops.cast(2.0 / (1.0 - self._beta2) - 1.0, var.dtype.base_dtype) rho_t = rho_inf - step * (2.0 * beta2_power / (1.0 - beta2_power)) r_t = math_ops.sqrt( (1.0 - beta2_power) * ((rho_t - 4.0) * (rho_t - 2.0) * rho_inf) / ((rho_inf - 4.0) * (rho_inf - 2.0) * rho_t)) update = control_flow_ops.cond(math_ops.greater(rho_t, 5.0), true_fn=lambda: (lr_t / (1.0 - beta1_power) * r_t) * (m_t / (math_ops.sqrt(v_t) + epsilon_t)), false_fn=lambda: (lr_t / (1.0 - beta1_power)) * m_t) var_update = state_ops.assign_sub(var, update, use_locking=self._use_locking) return control_flow_ops.group(*[var_update, m_t, v_t])
def do_iteration(current_argument, residual_powers, accumulator): """Compute one step of iterative exponentiation by squaring. The recursive form is: power(A, p) = { power(matmul(A, A), p / 2) for even p { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p power(A, 0) = I The power(A, 0) = I case is handeled by starting with accumulator set to the identity matrix; matrices with zero residual powers are passed through unchanged. Args: current_argument: On this step, what is the first argument (A^2..^2) to the (unrolled) recursive function? [batch size x N x N] residual_powers: On this step, what is the second argument (residual p)? [batch_size] accumulator: Accumulates the exterior multiplications from the odd powers (initially the identity matrix). [batch_size x N x N] Returns: Updated versions of each argument for one step of the unrolled computation. Does not change parts of the batch which have a residual power of zero. """ is_even = math_ops.equal( residual_powers % 2, array_ops.zeros(array_ops.shape(residual_powers), dtype=dtypes.int32)) new_accumulator = array_ops.where( is_even, accumulator, math_ops.matmul(accumulator, current_argument)) new_argument = math_ops.matmul(current_argument, current_argument) do_update = math_ops.greater(residual_powers, 1) new_residual_powers = residual_powers - residual_powers % 2 new_residual_powers //= 2 # Stop updating if we've reached our base case; some batch elements may # finish sooner than others accumulator = array_ops.where(do_update, new_accumulator, accumulator) current_argument = array_ops.where(do_update, new_argument, current_argument) residual_powers = array_ops.where(do_update, new_residual_powers, residual_powers) return (current_argument, residual_powers, accumulator)
def gcd(a, b, name=None): """Returns the greatest common divisor via Euclid's algorithm. Args: a: The dividend. A scalar integer `Tensor`. b: The divisor. A scalar integer `Tensor`. name: An optional name for the operation. Returns: A scalar `Tensor` representing the greatest common divisor between `a` and `b`. Raises: ValueError: If `a` or `b` are not scalar integers. """ with ops.name_scope(name, 'gcd', [a, b]): a = ops.convert_to_tensor(a) b = ops.convert_to_tensor(b) a.shape.assert_has_rank(0) b.shape.assert_has_rank(0) if not a.dtype.is_integer: raise ValueError('a must be an integer type. Got: %s' % a.dtype) if not b.dtype.is_integer: raise ValueError('b must be an integer type. Got: %s' % b.dtype) # TPU requires static shape inference. GCD is used for subframe size # computation, so we should prefer static computation where possible. const_a = tensor_util.constant_value(a) const_b = tensor_util.constant_value(b) if const_a is not None and const_b is not None: if sys.version_info.major < 3: math_gcd = fractions.gcd else: math_gcd = math.gcd return ops.convert_to_tensor(math_gcd(const_a, const_b)) cond = lambda _, b: math_ops.greater(b, array_ops.zeros_like(b)) body = lambda a, b: [b, math_ops.mod(a, b)] a, b = control_flow_ops.while_loop(cond, body, [a, b], back_prop=False) return a
def _test_combinations(): cases = [] identity = lambda x: x increment = lambda x: x + 1 minus_five = lambda x: x - 5 def increment_and_square(x): y = x + 1 return y * y functions = [identity, increment, minus_five, increment_and_square] take_all = lambda x: constant_op.constant(True) is_zero = lambda x: math_ops.equal(x, 0) is_odd = lambda x: math_ops.equal(x % 2, 0) greater = lambda x: math_ops.greater(x + 5, 0) predicates = [take_all, is_zero, is_odd, greater] for i, function in enumerate(functions): for j, predicate in enumerate(predicates): cases.append((function, "Scalar{}{}".format(i, j), predicate)) replicate = lambda x: (x, x) with_two = lambda x: (x, 2) functions = [replicate, with_two] take_all = lambda x, y: constant_op.constant(True) is_zero = lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0 ) predicates = [take_all, is_zero] for i, function in enumerate(functions): for j, predicate in enumerate(predicates): cases.append((function, "Tuple{}{}".format(i, j), predicate)) def reduce_fn(x, y): function, name, predicate = y return x + combinations.combine(function=function, predicate=combinations.NamedObject( name, predicate)) return functools.reduce(reduce_fn, cases, [])