def center_bias_not_in_mem(): """Accumulates the data and updates the logits bias, when ready.""" bias_dependencies = [] bias_accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians means only. # TODO(nponomareva): this will change for a multiclass shape=[2, 1], shared_name='bias_accumulator') grads_and_hess = array_ops.stack([gradients, hessians], axis=0) grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1) apply_grad = bias_accumulator.apply_grad(grads_and_hess, stamp_token) bias_dependencies.append(apply_grad) def center_bias_from_accumulator(): accumulated = array_ops.unstack( bias_accumulator.take_grad(1), axis=0) return _center_bias_fn( array_ops.expand_dims(accumulated[0], 0), array_ops.expand_dims(accumulated[1], 0)) with ops.control_dependencies(bias_dependencies): if config.is_chief: center_bias_op = control_flow_ops.cond( math_ops.greater_equal(bias_accumulator.num_accumulated(), n_batches_per_layer), center_bias_from_accumulator, control_flow_ops.no_op, name='wait_until_n_batches_for_bias_accumulated') return center_bias_op
def is_mask_update_iter(self, global_step, last_update_step): """Function for checking if the current step is a mask update step. It also creates the drop_fraction op and assigns it to the self object. Args: global_step: tf.Variable(int), current training step. last_update_step: tf.Variable(int), holding the last iteration the mask is updated. Used to determine whether current iteration is a mask update step. Returns: bool, whether the current iteration is a mask_update step. """ gs_dtype = global_step.dtype self._begin_step = math_ops.cast(self._begin_step, gs_dtype) self._end_step = math_ops.cast(self._end_step, gs_dtype) self._frequency = math_ops.cast(self._frequency, gs_dtype) is_step_within_update_range = math_ops.logical_and( math_ops.greater_equal(global_step, self._begin_step), math_ops.logical_or( math_ops.less_equal(global_step, self._end_step), # If _end_step is negative, we never stop updating the mask. # In other words we update the mask with given frequency until the # training ends. math_ops.less(self._end_step, 0))) is_update_step = math_ops.less_equal( math_ops.add(last_update_step, self._frequency), global_step) is_mask_update_iter_op = math_ops.logical_and( is_step_within_update_range, is_update_step) self.drop_fraction = self.get_drop_fraction(global_step, is_mask_update_iter_op) return is_mask_update_iter_op
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if inputs.shape.rank == 1: inputs = array_ops.expand_dims(inputs, 1) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "count_weights is not used in `output_mode='binary'`. " "Please pass a single input.") out_depth = self.num_tokens binary_output = (self.output_mode == BINARY) if isinstance(inputs, sparse_tensor.SparseTensor): max_value = math_ops.reduce_max(inputs.values) min_value = math_ops.reduce_min(inputs.values) else: max_value = math_ops.reduce_max(inputs) min_value = math_ops.reduce_min(inputs) condition = math_ops.logical_and( math_ops.greater(math_ops.cast(out_depth, max_value.dtype), max_value), math_ops.greater_equal(min_value, math_ops.cast(0, min_value.dtype))) control_flow_ops.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)
def _filter_input(input_tensor, vocab_freq_table, vocab_min_count, vocab_subsampling, corpus_size, seed): """Filters input tensor based on vocab freq, threshold, and subsampling.""" if vocab_freq_table is None: return input_tensor if not isinstance(vocab_freq_table, lookup.InitializableLookupTableBase): raise ValueError( "vocab_freq_table must be a subclass of " "InitializableLookupTableBase (such as HashTable) instead of type " "{}.".format(type(vocab_freq_table))) with ops.name_scope( "filter_vocab", values=[vocab_freq_table, input_tensor, vocab_min_count]): freq = vocab_freq_table.lookup(input_tensor) # Filters out elements in input_tensor that are not found in # vocab_freq_table (table returns a default value of -1 specified above when # an element is not found). mask = math_ops.not_equal(freq, vocab_freq_table.default_value) # Filters out elements whose vocab frequencies are less than the threshold. if vocab_min_count is not None: cast_threshold = math_ops.cast(vocab_min_count, freq.dtype) mask = math_ops.logical_and(mask, math_ops.greater_equal(freq, cast_threshold)) input_tensor = array_ops.boolean_mask(input_tensor, mask) freq = array_ops.boolean_mask(freq, mask) if not vocab_subsampling: return input_tensor if vocab_subsampling < 0 or vocab_subsampling > 1: raise ValueError( "Invalid vocab_subsampling={} - it should be within range [0, 1].". format(vocab_subsampling)) # Subsamples the input tokens based on vocabulary frequency and # vocab_subsampling threshold (ie randomly discard commonly appearing # tokens). with ops.name_scope( "subsample_vocab", values=[input_tensor, freq, vocab_subsampling]): corpus_size = math_ops.cast(corpus_size, dtypes.float64) freq = math_ops.cast(freq, dtypes.float64) vocab_subsampling = math_ops.cast(vocab_subsampling, dtypes.float64) # From tensorflow_models/tutorials/embedding/word2vec_kernels.cc, which is # suppose to correlate with Eq. 5 in http://arxiv.org/abs/1310.4546. keep_prob = ((math_ops.sqrt(freq / (vocab_subsampling * corpus_size)) + 1.0) * (vocab_subsampling * corpus_size / freq)) random_prob = random_ops.random_uniform( array_ops.shape(freq), minval=0, maxval=1, dtype=dtypes.float64, seed=seed) mask = math_ops.less_equal(random_prob, keep_prob) return array_ops.boolean_mask(input_tensor, mask)
def _lower_triangular_mask(shape): """Creates a lower-triangular boolean mask over the last 2 dimensions.""" row_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-2) col_index = math_ops.cumsum( array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-1) return math_ops.greater_equal(row_index, col_index)
def _benchmarkMapAndFilter(self, chain_length, optimize_dataset): with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors(0).repeat(None) for _ in range(chain_length): dataset = dataset.map(lambda x: x + 5).filter( lambda x: math_ops.greater_equal(x - 5, 0)) if optimize_dataset: dataset = dataset.apply( optimization.optimize(["map_and_filter_fusion"])) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() with session.Session() as sess: for _ in range(10): sess.run(next_element.op) deltas = [] for _ in range(100): start = time.time() for _ in range(100): sess.run(next_element.op) end = time.time() deltas.append(end - start) median_wall_time = np.median(deltas) / 100 opt_mark = "opt" if optimize_dataset else "no-opt" print("Map and filter dataset {} chain length: {} Median wall time: {}". format(opt_mark, chain_length, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, name="benchmark_map_and_filter_dataset_chain_latency_{}_{}".format( opt_mark, chain_length))
def _benchmarkFilters(self, chain_length, optimize_dataset): with ops.Graph().as_default(): dataset = dataset_ops.Dataset.from_tensors(5).repeat(None) for _ in range(chain_length): dataset = dataset.filter(lambda x: math_ops.greater_equal(x - 5, 0)) if optimize_dataset: dataset = dataset.apply(optimization.optimize(["filter_fusion"])) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() with session.Session() as sess: for _ in range(10): self.evaluate(next_element.op) deltas = [] for _ in range(100): start = time.time() for _ in range(100): self.evaluate(next_element.op) end = time.time() deltas.append(end - start) median_wall_time = np.median(deltas) / 100 opt_mark = "opt" if optimize_dataset else "no-opt" print("Filter dataset {} chain length: {} Median wall time: {}".format( opt_mark, chain_length, median_wall_time)) self.report_benchmark( iters=1000, wall_time=median_wall_time, name="benchmark_filter_dataset_chain_latency_{}_{}".format( opt_mark, chain_length))
def _sort_and_normalize(labels, logits, weights=None): """Sorts `labels` and `logits` and normalize `weights`. Args: labels: A `Tensor` of the same shape as `logits` representing graded relevance. logits: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding item. weights: A scalar, a `Tensor` with shape [batch_size, 1], or a `Tensor` with the same shape as `labels`. Returns: A tuple of (sorted_labels, sorted_logits, sorted_weights). """ labels = ops.convert_to_tensor(labels) logits = ops.convert_to_tensor(logits) logits.get_shape().assert_has_rank(2) logits.get_shape().assert_is_compatible_with(labels.get_shape()) weights = 1.0 if weights is None else ops.convert_to_tensor(weights) weights = array_ops.ones_like(labels) * weights _, topn = array_ops.unstack(array_ops.shape(logits)) # Only sort entries with valid labels that are >= 0. scores = array_ops.where( math_ops.greater_equal(labels, 0.), logits, -1e-6 * array_ops.ones_like(logits) + math_ops.reduce_min(logits, axis=1, keepdims=True)) sorted_labels, sorted_logits, sorted_weights = utils.sort_by_scores( scores, [labels, logits, weights], topn=topn) return sorted_labels, sorted_logits, sorted_weights
def mean_reciprocal_rank(labels, predictions, weights=None, name=None): """Computes mean reciprocal rank (MRR). Args: labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a relevant example. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. name: A string used as the name for this metric. Returns: A metric for the weighted mean reciprocal rank of the batch. """ with ops.name_scope(name, 'mean_reciprocal_rank', (labels, predictions, weights)): _, list_size = array_ops.unstack(array_ops.shape(predictions)) labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, list_size) sorted_labels, = utils.sort_by_scores(predictions, [labels], topn=topn) # Relevance = 1.0 when labels >= 1.0 to accommodate graded relevance. relevance = math_ops.to_float( math_ops.greater_equal(sorted_labels, 1.0)) reciprocal_rank = 1.0 / math_ops.to_float(math_ops.range(1, topn + 1)) # MRR has a shape of [batch_size, 1] mrr = math_ops.reduce_max(relevance * reciprocal_rank, axis=1, keepdims=True) return math_ops.reduce_mean(mrr * array_ops.ones_like(weights) * weights)
def _GatherDropNegatives(params, ids, zero_clipped_indices=None, is_positive=None): """ Helper function for unsorted segment ops. Gathers params for positive segment ids and gathers 0 for inputs with negative segment id. Also returns the clipped indices and a boolean mask with the same shape as ids where a positive id is masked as true. With this, the latter two can be passed as arguments to this function to reuse them. """ if zero_clipped_indices is None: zero_clipped_indices = math_ops.maximum(ids, array_ops.zeros_like(ids)) gathered = array_ops.gather(params, zero_clipped_indices) if is_positive is None: is_positive = math_ops.greater_equal(ids, 0) # tf.where(condition, x, y) requires condition to have the same shape as x # and y. # todo(philjd): remove this if tf.where supports broadcasting (#9284) for _ in range(gathered.shape.ndims - is_positive.shape.ndims): is_positive = array_ops.expand_dims(is_positive, -1) is_positive = (is_positive & array_ops.ones_like(gathered, dtype=dtypes.bool)) # replace gathered params of negative indices with 0 zero_slice = array_ops.zeros_like(gathered) return (array_ops.where(is_positive, gathered, zero_slice), zero_clipped_indices, is_positive)
def _filter_input(input_tensor, vocab_freq_table, vocab_min_count, vocab_subsampling, corpus_size, seed): """Filters input tensor based on vocab freq, threshold, and subsampling.""" if vocab_freq_table is None: return input_tensor if not isinstance(vocab_freq_table, lookup.InitializableLookupTableBase): raise ValueError( "vocab_freq_table must be a subclass of " "InitializableLookupTableBase (such as HashTable) instead of type " "{}.".format(type(vocab_freq_table))) with ops.name_scope( "filter_vocab", values=[vocab_freq_table, input_tensor, vocab_min_count]): freq = vocab_freq_table.lookup(input_tensor) # Filters out elements in input_tensor that are not found in # vocab_freq_table (table returns a default value of -1 specified above when # an element is not found). mask = math_ops.not_equal(freq, vocab_freq_table.default_value) # Filters out elements whose vocab frequencies are less than the threshold. if vocab_min_count is not None: cast_threshold = math_ops.cast(vocab_min_count, freq.dtype) mask = math_ops.logical_and( mask, math_ops.greater_equal(freq, cast_threshold)) input_tensor = array_ops.boolean_mask(input_tensor, mask) freq = array_ops.boolean_mask(freq, mask) if not vocab_subsampling: return input_tensor if vocab_subsampling < 0 or vocab_subsampling > 1: raise ValueError( "Invalid vocab_subsampling={} - it should be within range [0, 1].". format(vocab_subsampling)) # Subsamples the input tokens based on vocabulary frequency and # vocab_subsampling threshold (ie randomly discard commonly appearing # tokens). with ops.name_scope("subsample_vocab", values=[input_tensor, freq, vocab_subsampling]): corpus_size = math_ops.cast(corpus_size, dtypes.float64) freq = math_ops.cast(freq, dtypes.float64) vocab_subsampling = math_ops.cast(vocab_subsampling, dtypes.float64) # From tensorflow_models/tutorials/embedding/word2vec_kernels.cc, which is # suppose to correlate with Eq. 5 in http://arxiv.org/abs/1310.4546. keep_prob = ((math_ops.sqrt(freq / (vocab_subsampling * corpus_size)) + 1.0) * (vocab_subsampling * corpus_size / freq)) random_prob = random_ops.random_uniform(array_ops.shape(freq), minval=0, maxval=1, dtype=dtypes.float64, seed=seed) mask = math_ops.less_equal(random_prob, keep_prob) return array_ops.boolean_mask(input_tensor, mask)
def __call__(self, w): trace = tf.linalg.trace(w) return w * ( math_ops.cast(math_ops.greater_equal(trace, 1.), K.floatx()) * math_ops.cast(math_ops.less_equal(w - tf.linalg.diag_part(w), 0), K.floatx()) ) #+ self.alpha * tf.norm(w, 1)
def fn(a, b): fn1 = lambda: computation_with_string_ops(a * 100) fn2 = lambda: computation_with_string_ops(a) pred = math_ops.greater_equal(a, b) result = array_ops.identity(control_flow_ops.cond( pred, fn1, fn2), name="uncompilable_control_flow") return result
def _prune_invalid_ids(sparse_ids, sparse_weights): """Prune invalid IDs (< 0) from the input ids and weights.""" is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and(is_id_valid, math_ops.greater(sparse_weights.values, 0)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights
def _accuracy_at_threshold(labels, predictions, weights, threshold, name=None): with ops.name_scope( name, 'accuracy_at_%s' % threshold, (predictions, labels, weights, threshold)) as scope: threshold_predictions = math_ops.to_float( math_ops.greater_equal(predictions, threshold)) return metrics_lib.accuracy( labels=labels, predictions=threshold_predictions, weights=weights, name=scope)
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if inputs.shape.rank == 1: inputs = array_ops.expand_dims(inputs, 1) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "count_weights is not used in `output_mode='tf-idf'`, " "or `output_mode='binary'`. Please pass a single input.") self._called = True if self.max_tokens is None: raise RuntimeError( "If you construct a `CategoryEncoding` layer with " "`max_tokens=None`, you need to call `adapt()` " "on it before using it") else: out_depth = self.max_tokens if self.output_mode == TFIDF: # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. if self.sparse: raise ValueError("`sparse=True` with `output_mode=tfidf` " "is not supported.") if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights) tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth))) return tf_idf_data binary_output = (self.output_mode == BINARY) if isinstance(inputs, sparse_tensor.SparseTensor): max_value = math_ops.reduce_max(inputs.values) min_value = math_ops.reduce_min(inputs.values) else: max_value = math_ops.reduce_max(inputs) min_value = math_ops.reduce_min(inputs) condition = math_ops.logical_and( math_ops.greater(math_ops.cast(out_depth, max_value.dtype), max_value), math_ops.greater_equal(min_value, math_ops.cast(0, min_value.dtype))) control_flow_ops.Assert(condition, [ "Input values must be in the range 0 <= values < max_tokens" " with max_tokens={}".format(out_depth) ]) if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)
def _accuracy_at_threshold(labels, predictions, weights, threshold, name=None): with ops.name_scope(name, 'accuracy_at_%s' % threshold, (predictions, labels, weights, threshold)) as scope: threshold_predictions = math_ops.to_float( math_ops.greater_equal(predictions, threshold)) return metrics_lib.accuracy(labels=labels, predictions=threshold_predictions, weights=weights, name=scope)
def _lower_triangular_mask(shape): """Creates a lower-triangular boolean mask over the last 2 dimensions.""" row_index = math_ops.cumsum(array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-2) col_index = math_ops.cumsum(array_ops.ones(shape=shape, dtype=dtypes.int32), axis=-1) return math_ops.greater_equal(row_index, col_index)
def _labels_and_logits_metrics(self, labels, logits): """Returns metrics for labels and logits.""" is_label_valid = array_ops.reshape(math_ops.greater_equal(labels, 0.), [-1]) return { name: metrics_core.mean( array_ops.boolean_mask( array_ops.reshape(tensor, [-1]), is_label_valid)) for name, tensor in [('labels_mean', labels), ('logits_mean', logits)] }
def _ssim_per_channel(img1, img2, max_val=1.0): filter_size = constant_op.constant(11, dtype=dtypes.int32) filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) shape1, shape2 = array_ops.shape_n([img1, img2]) checks = [ control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8) ] # Enforce the check to run before computation. with ops.control_dependencies(checks): img1 = array_ops.identity(img1) kernel = _fspecial_gauss(filter_size, filter_sigma) kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) compensation = 1.0 def reducer(x): shape = array_ops.shape(x) x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') return array_ops.reshape( y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation) # Average over the second and the third from the last: height, width. axes = constant_op.constant([-3, -2], dtype=dtypes.int32) ssim_val = math_ops.reduce_mean(luminance * cs, axes) cs = math_ops.reduce_mean(cs, axes) luminance = math_ops.reduce_mean(luminance, axes) return ssim_val, cs, luminance
def _prune_invalid_ids(sparse_ids, sparse_weights): """Prune invalid IDs (< 0) from the input ids and weights.""" is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and( is_id_valid, math_ops.greater(sparse_weights.values, 0)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) def expand_dims(inputs, axis): if tf_utils.is_sparse(inputs): return sparse_ops.sparse_expand_dims(inputs, axis) else: return array_ops.expand_dims(inputs, axis) original_shape = inputs.shape # In all cases, we should uprank scalar input to a single sample. if inputs.shape.rank == 0: inputs = expand_dims(inputs, -1) # One hot will unprank only if the final output dimension is not already 1. if self.output_mode == ONE_HOT: if inputs.shape[-1] != 1: inputs = expand_dims(inputs, -1) # TODO(b/190445202): remove output rank restriction. if inputs.shape.rank > 2: raise ValueError( "Received input shape {}, which would result in output rank {}. " "Currently only outputs up to rank 2 are supported.".format( original_shape, inputs.shape.rank)) if count_weights is not None and self.output_mode != COUNT: raise ValueError( "`count_weights` is not used when `output_mode` is not `'count'`. " "Received `count_weights={}`.".format(count_weights)) out_depth = self.num_tokens binary_output = self.output_mode in (MULTI_HOT, ONE_HOT) if isinstance(inputs, sparse_tensor.SparseTensor): max_value = math_ops.reduce_max(inputs.values) min_value = math_ops.reduce_min(inputs.values) else: max_value = math_ops.reduce_max(inputs) min_value = math_ops.reduce_min(inputs) condition = math_ops.logical_and( math_ops.greater(math_ops.cast(out_depth, max_value.dtype), max_value), math_ops.greater_equal(min_value, math_ops.cast(0, min_value.dtype))) assertion = control_flow_ops.Assert(condition, [ "Input values must be in the range 0 <= values < num_tokens" " with num_tokens={}".format(out_depth) ]) with ops.control_dependencies([assertion]): if self.sparse: return sparse_bincount(inputs, out_depth, binary_output, count_weights) else: return dense_bincount(inputs, out_depth, binary_output, count_weights)
def grow_not_in_mem(): """Accumulates the data and grows a layer when ready.""" accumulators = [] dependencies = [] for i, feature_ids in enumerate(feature_ids_list): stats_summaries = stats_summaries_list[i] accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians (the last dimension). shape=[ len(feature_ids), max_splits, bucket_size_list[i], 2 ], shared_name='numeric_stats_summary_accumulator_' + str(i)) accumulators.append(accumulator) apply_grad = accumulator.apply_grad( array_ops.stack(stats_summaries, axis=0), stamp_token) dependencies.append(apply_grad) def grow_tree_from_accumulated_summaries_fn(): """Updates tree with the best layer from accumulated summaries.""" # Take out the accumulated summaries from the accumulator and grow. stats_summaries_list = [] stats_summaries_list = [ array_ops.unstack(accumulator.take_grad(1), axis=0) for accumulator in accumulators ] grow_op = grow_tree_from_stats_summaries( stats_summaries_list, feature_ids_list) return grow_op with ops.control_dependencies(dependencies): if config.is_chief: min_accumulated = math_ops.reduce_min( array_ops.stack([ acc.num_accumulated() for acc in accumulators ])) grow_model = control_flow_ops.cond( math_ops.greater_equal(min_accumulated, n_batches_per_layer), grow_tree_from_accumulated_summaries_fn, control_flow_ops.no_op, name='wait_until_n_batches_accumulated') return grow_model else: return control_flow_ops.no_op()
def assert_rank_at_least(x, rank, data=None, summarize=None, name=None): """Assert `x` has rank equal to `rank` or higher. Example of adding a dependency to an operation: ```python with tf.control_dependencies([tf.assert_rank_at_least(x, 2)]): output = tf.reduce_sum(x) ``` Example of adding dependency to the tensor being checked: ```python x = tf.with_dependencies([tf.assert_rank_at_least(x, 2)], x) ``` Args: x: Numeric `Tensor`. rank: Scalar `Tensor`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`. summarize: Print this many entries of each tensor. name: A name for this operation (optional). Defaults to "assert_rank_at_least". Returns: Op raising `InvalidArgumentError` unless `x` has specified rank or higher. Raises: ValueError: If static checks determine `x` has wrong rank. """ with ops.op_scope([x], name, 'assert_rank_at_least'): x = ops.convert_to_tensor(x, name='x') rank = ops.convert_to_tensor(rank, name='rank') # Attempt to statically defined rank. x_rank_static = x.get_shape().ndims rank_static = tensor_util.constant_value(rank) if x_rank_static is not None and rank_static is not None: if x_rank_static < rank_static: raise ValueError( 'Tensor %s must have rank %d. Received rank %d, shape %s' % (x.name, rank_static, x_rank_static, x.get_shape())) return control_flow_ops.no_op( name='static_checks_determined_all_ok') if data is None: data = [ 'Tensor %s must have rank at least' % x.name, rank, 'Received shape: ', array_ops.shape(x) ] condition = math_ops.greater_equal(array_ops.rank(x), rank) return logging_ops.Assert(condition, data, summarize=summarize)
def func(table, indices, min_idx, max_idx): # Do a serialized embedding lookup by adjusting the indices. adjusted_indices = indices - min_idx x = ipu.embedding_ops.embedding_lookup(table, adjusted_indices) # Mask out any outputs which are not in range [min_idx, max_idx). mask_max = math_ops.less(indices, max_idx) mask_min = math_ops.greater_equal(indices, min_idx) mask = math_ops.cast(math_ops.logical_and(mask_max, mask_min), np.float16) mask = array_ops.expand_dims(mask, 1) return x * mask
def _convert_to_sparse_inputs(self, inputs): if isinstance(inputs, sparse_tensor.SparseTensor): return inputs elif isinstance(inputs, ragged_tensor.RaggedTensor): return inputs.to_sparse() else: indices = array_ops.where_v2( math_ops.greater_equal(inputs, array_ops.constant(0, inputs.dtype))) values = array_ops.gather_nd(inputs, indices) shape = array_ops.shape(inputs, out_type=dtypes.int64) return sparse_tensor.SparseTensor(indices, values, shape)
def __init__(self, topn, positive_fn=lambda label: math_ops.greater_equal(label, 1.0)): """Constructor. Args: topn: (int) The K in Precision@K metric. positive_fn: (function): A function on `Tensor` that output boolean True for positive examples. The rest are negative examples. """ self._topn = topn self._positive_fn = positive_fn
def precision(labels, predictions, weights=None, topn=None, name=None): """Computes precision as weighted average of relevant examples. Args: labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a relevant example. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. topn: A cutoff for how many examples to consider for this metric. name: A string used as the name for this metric. Returns: A metric for the weighted precision of the batch. """ with ops.name_scope(name, 'precision', (labels, predictions, weights)): labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, topn) sorted_labels, sorted_weights = utils.sort_by_scores(predictions, [labels, weights], topn=topn) # Relevance = 1.0 when labels >= 1.0. # relevance = math_ops.to_float(math_ops.greater_equal(sorted_labels, 1.0)) relevance = tf.cast(math_ops.greater_equal(sorted_labels, 1.0), tf.float32) per_list_precision = _safe_div( math_ops.reduce_sum(relevance * sorted_weights, 1, keepdims=True), math_ops.reduce_sum(array_ops.ones_like(relevance) * sorted_weights, 1, keepdims=True)) # per_list_weights are computed from the whole list to avoid the problem of # 0 when there is no relevant example in topn. per_list_weights = _per_example_weights_to_per_list_weights( # weights, math_ops.to_float(math_ops.greater_equal(labels, 1.0))) weights, tf.cast(math_ops.greater_equal(labels, 1.0)), tf.float32) return math_ops.reduce_mean(per_list_precision * per_list_weights)
def assert_greater_equal(x, y, data=None, summarize=None, message=None, name=None): """Assert the condition `x >= y` holds element-wise. Example of adding a dependency to an operation: ```python with tf.control_dependencies([tf.assert_greater_equal(x, y)]): output = tf.reduce_sum(x) ``` This condition holds if for every pair of (possibly broadcast) elements `x[i]`, `y[i]`, we have `x[i] >= y[i]`. If both `x` and `y` are empty, this is trivially satisfied. Args: x: Numeric `Tensor`. y: Numeric `Tensor`, same dtype as and broadcastable to `x`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`, `y`. summarize: Print this many entries of each tensor. message: A string to prefix to the default message. name: A name for this operation (optional). Defaults to "assert_greater_equal" Returns: Op that raises `InvalidArgumentError` if `x >= y` is False. """ message = message or '' with ops.name_scope(name, 'assert_greater_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: x_name = x.name y_name = y.name if data is None: data = [ message, 'Condition x >= y did not hold element-wise:' 'x (%s) = ' % x_name, x, 'y (%s) = ' % y_name, y ] condition = math_ops.reduce_all(math_ops.greater_equal(x, y)) return control_flow_ops.Assert(condition, data, summarize=summarize)
def cosine_decay_fn(global_step): if global_step is None: raise ValueError("global_step is required for cosine_decay.") global_step = math_ops.minimum(global_step, decay_steps) completed_fraction = math_ops.to_float(global_step) / math_ops.to_float( decay_steps) fraction = 2.0 * num_periods * completed_fraction decayed = 0.5 * ( 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) if zero_after is not None: decayed = array_ops.where( math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed) return decayed
def _ssim_per_channel(img1, img2, alpha, beta_gamma, max_val=1.0): filter_size = constant_op.constant(11, dtype=dtypes.int32) filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) (shape1, shape2) = array_ops.shape_n([img1, img2]) checks = [ control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), control_flow_ops.Assert(math_ops.reduce_all( math_ops.greater_equal(shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8) ] with ops.control_dependencies(checks): img1 = array_ops.identity(img1) kernel = _fspecial_gauss(filter_size, filter_sigma) kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) compensation = 1.0 def reducer(x): shape = array_ops.shape(x) x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') return array_ops.reshape( y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) (luminance, cs) = _ssim_helper(img1, img2, reducer, max_val, alpha, beta_gamma, compensation) axes = constant_op.constant([-3, -2], dtype=dtypes.int32) ssim_val = math_ops.reduce_mean(luminance * cs, axes) cs = math_ops.reduce_mean(cs, axes) return (ssim_val, cs)
def cosine_decay_fn(global_step): if global_step is None: raise ValueError("global_step is required for cosine_decay.") global_step = math_ops.minimum(global_step, decay_steps) completed_fraction = math_ops.to_float( global_step) / math_ops.to_float(decay_steps) fraction = 2.0 * num_periods * completed_fraction decayed = 0.5 * ( 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) if zero_after is not None: decayed = array_ops.where( math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed) return decayed
def _insert_fixed_quant_op(context, name, producer, consumers, init_min=-6.0, init_max=6.0, quant_delay=None): """Adds a fake quant op with fixed ranges. Args: context: The parent scope of the op to be quantized. name: The name of the fake quant op. producer: The producer op to be quantized. consumers: The consumer ops to the producer op. init_min: The minimum range for the fake quant op. init_max: The maximum range for the fake quant op. quant_delay: Number of steps to wait before activating the fake quant op. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ name_prefix = name if not context else context + '/' + name inputs = producer.outputs[0] quant = quant_ops.FixedQuantize(inputs, init_min=init_min, init_max=init_max, scope=name_prefix) if quant_delay and quant_delay > 0: activate_quant = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), quant_delay, name=name_prefix + '/activate_quant') quant = control_flow_ops.cond(activate_quant, lambda: quant, lambda: inputs, name=name_prefix + '/delayed_quant') if consumers: tensors_modified_count = common.RerouteTensor(quant, inputs, can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # tensors_modified_count is greater than or equal to the length of the set # of consumers. if tensors_modified_count < len(consumers): raise ValueError( 'No inputs quantized for ops: [%s]' % ', '.join([consumer.name for consumer in consumers]))
def _make_logistic_eval_metric_ops(labels, predictions, thresholds): """Returns a dictionary of evaluation metric ops for logistic regression. Args: labels: The labels `Tensor`, or a dict with only one `Tensor` keyed by name. predictions: The predictions `Tensor`. thresholds: List of floating point thresholds to use for accuracy, precision, and recall metrics. Returns: A dict of metric results keyed by name. """ # If labels is a dict with a single key, unpack into a single tensor. labels_tensor = labels if isinstance(labels, dict) and len(labels) == 1: labels_tensor = labels.values()[0] metrics = {} metrics[metric_key.MetricKey.PREDICTION_MEAN] = metrics_lib.streaming_mean( predictions) metrics[metric_key.MetricKey.LABEL_MEAN] = metrics_lib.streaming_mean( labels_tensor) # Also include the streaming mean of the label as an accuracy baseline, as # a reminder to users. metrics[metric_key.MetricKey. ACCURACY_BASELINE] = metrics_lib.streaming_mean(labels_tensor) metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc( labels=labels_tensor, predictions=predictions) for threshold in thresholds: predictions_at_threshold = math_ops.cast( math_ops.greater_equal(predictions, threshold), dtypes.float32, name='predictions_at_threshold_%f' % threshold) metrics[metric_key.MetricKey.ACCURACY_MEAN % threshold] = (metrics_lib.streaming_accuracy( labels=labels_tensor, predictions=predictions_at_threshold)) # Precision for positive examples. metrics[metric_key.MetricKey.PRECISION_MEAN % threshold] = (metrics_lib.streaming_precision( labels=labels_tensor, predictions=predictions_at_threshold)) # Recall for positive examples. metrics[metric_key.MetricKey.RECALL_MEAN % threshold] = (metrics_lib.streaming_recall( labels=labels_tensor, predictions=predictions_at_threshold)) return metrics
def assert_rank_at_least(x, rank, data=None, summarize=None, name=None): """Assert `x` has rank equal to `rank` or higher. Example of adding a dependency to an operation: ```python with tf.control_dependencies([tf.assert_rank_at_least(x, 2)]): output = tf.reduce_sum(x) ``` Example of adding dependency to the tensor being checked: ```python x = tf.with_dependencies([tf.assert_rank_at_least(x, 2)], x) ``` Args: x: Numeric `Tensor`. rank: Scalar `Tensor`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`. summarize: Print this many entries of each tensor. name: A name for this operation (optional). Defaults to "assert_rank_at_least". Returns: Op raising `InvalidArgumentError` unless `x` has specified rank or higher. Raises: ValueError: If static checks determine `x` has wrong rank. """ with ops.op_scope([x], name, "assert_rank_at_least"): x = ops.convert_to_tensor(x, name="x") rank = ops.convert_to_tensor(rank, name="rank") # Attempt to statically defined rank. x_rank_static = x.get_shape().ndims rank_static = tensor_util.constant_value(rank) if x_rank_static is not None and rank_static is not None: if x_rank_static < rank_static: raise ValueError( "Tensor %s must have rank %d. Received rank %d, shape %s" % (x.name, rank_static, x_rank_static, x.get_shape()) ) return control_flow_ops.no_op(name="static_checks_determined_all_ok") if data is None: data = ["Tensor %s must have rank at least" % x.name, rank, "Received shape: ", array_ops.shape(x)] condition = math_ops.greater_equal(array_ops.rank(x), rank) return logging_ops.Assert(condition, data, summarize=summarize)
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) k = math_ops.cast( math_ops.round( math_ops.cast(array_ops.size(abs_weights), dtypes.float32) * (1 - sparsity)), dtypes.int32) # Sort the entire array values, _ = nn_ops.top_k(array_ops.reshape(abs_weights, [-1]), k=array_ops.size(abs_weights)) # Grab the (k-1) th value current_threshold = array_ops.gather(values, k - 1) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater_equal(abs_weights, smoothed_threshold), dtypes.float32) return smoothed_threshold, new_mask
def maybe_update_masks(): with ops.name_scope(self._spec.name): is_step_within_pruning_range = math_ops.logical_and( math_ops.greater_equal(self._global_step, self._spec.begin_pruning_step), # If end_pruning_step is negative, keep pruning forever! math_ops.logical_or( math_ops.less_equal(self._global_step, self._spec.end_pruning_step), math_ops.less(self._spec.end_pruning_step, 0))) is_pruning_step = math_ops.less_equal( math_ops.add(self._last_update_step, self._spec.pruning_frequency), self._global_step) return math_ops.logical_and(is_step_within_pruning_range, is_pruning_step)
def restart_decay_fn(global_step): if global_step is None: raise ValueError("global_step is required for cosine_decay.") global_step = math_ops.minimum(global_step, decay_steps) num = math_ops.mod(num_periods * math_ops.to_float(global_step), decay_steps) fraction = num / math_ops.to_float(decay_steps) decayed = 0.5 * ( 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) if zero_after is not None: tmp = math_ops.to_float( num_periods * global_step) / math_ops.to_float(decay_steps) decayed = array_ops.where( math_ops.greater_equal(tmp, zero_after), 0.0, decayed) return decayed
def _update_mask(self, weights, threshold): """Updates the mask for a given weight tensor. This functions first computes the cdf of the weight tensor, and estimates the threshold value such that 'desired_sparsity' fraction of weights have magnitude less than the threshold. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new threshold and return the exponential moving average using the current value of threshold Returns: new_threshold: The new value of the threshold based on weights, and sparsity at the current global_step new_mask: A numpy array of the same size and shape as weights containing 0 or 1 to indicate which of the values in weights falls below the threshold Raises: ValueError: if sparsity is not defined """ if self._sparsity is None: raise ValueError('Sparsity variable undefined') sparsity = self._get_sparsity(weights.op.name) with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs(weights) k = math_ops.cast( math_ops.round( math_ops.cast(array_ops.size(abs_weights), dtypes.float32) * (1 - sparsity)), dtypes.int32) # Sort the entire array values, _ = nn_ops.top_k( array_ops.reshape(abs_weights, [-1]), k=array_ops.size(abs_weights)) # Grab the (k-1) th value current_threshold = array_ops.gather(values, k - 1) smoothed_threshold = math_ops.add_n([ math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay), math_ops.multiply(threshold, self._spec.threshold_decay) ]) new_mask = math_ops.cast( math_ops.greater_equal(abs_weights, smoothed_threshold), dtypes.float32) return smoothed_threshold, new_mask
def grow_tree(self, stats_summaries_list, feature_ids_list, last_layer_nodes_range): # For not in memory situation, we need to accumulate enough of batches first # before proceeding with building a tree layer. max_splits = _get_max_splits(self._tree_hparams) # Prepare accumulators. accumulators = [] dependencies = [] for i, feature_ids in enumerate(feature_ids_list): stats_summaries = stats_summaries_list[i] accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians (the last dimension). shape=[len(feature_ids), max_splits, self._bucket_size_list[i], 2], shared_name='numeric_stats_summary_accumulator_' + str(i)) accumulators.append(accumulator) apply_grad = accumulator.apply_grad( array_ops.stack(stats_summaries, axis=0), self._stamp_token) dependencies.append(apply_grad) # Grow the tree if enough batches is accumulated. with ops.control_dependencies(dependencies): if not self._is_chief: return control_flow_ops.no_op() min_accumulated = math_ops.reduce_min( array_ops.stack([acc.num_accumulated() for acc in accumulators])) def grow_tree_from_accumulated_summaries_fn(): """Updates tree with the best layer from accumulated summaries.""" # Take out the accumulated summaries from the accumulator and grow. stats_summaries_list = [] stats_summaries_list = [ array_ops.unstack(accumulator.take_grad(1), axis=0) for accumulator in accumulators ] grow_op = self._grow_tree_from_stats_summaries( stats_summaries_list, feature_ids_list, last_layer_nodes_range) return grow_op grow_model = control_flow_ops.cond( math_ops.greater_equal(min_accumulated, self._n_batches_per_layer), grow_tree_from_accumulated_summaries_fn, control_flow_ops.no_op, name='wait_until_n_batches_accumulated') return grow_model
def _make_logistic_eval_metric_ops(labels, predictions, thresholds): """Returns a dictionary of evaluation metric ops for logistic regression. Args: labels: The labels `Tensor`, or a dict with only one `Tensor` keyed by name. predictions: The predictions `Tensor`. thresholds: List of floating point thresholds to use for accuracy, precision, and recall metrics. Returns: A dict of metric results keyed by name. """ # If labels is a dict with a single key, unpack into a single tensor. labels_tensor = labels if isinstance(labels, dict) and len(labels) == 1: labels_tensor = labels.values()[0] metrics = {} metrics[metric_key.MetricKey.PREDICTION_MEAN] = metrics_lib.streaming_mean( predictions) metrics[metric_key.MetricKey.LABEL_MEAN] = metrics_lib.streaming_mean( labels_tensor) # Also include the streaming mean of the label as an accuracy baseline, as # a reminder to users. metrics[metric_key.MetricKey.ACCURACY_BASELINE] = metrics_lib.streaming_mean( labels_tensor) metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc( labels=labels_tensor, predictions=predictions) for threshold in thresholds: predictions_at_threshold = math_ops.cast( math_ops.greater_equal(predictions, threshold), dtypes.float32, name='predictions_at_threshold_%f' % threshold) metrics[metric_key.MetricKey.ACCURACY_MEAN % threshold] = ( metrics_lib.streaming_accuracy(labels=labels_tensor, predictions=predictions_at_threshold)) # Precision for positive examples. metrics[metric_key.MetricKey.PRECISION_MEAN % threshold] = ( metrics_lib.streaming_precision(labels=labels_tensor, predictions=predictions_at_threshold)) # Recall for positive examples. metrics[metric_key.MetricKey.RECALL_MEAN % threshold] = ( metrics_lib.streaming_recall(labels=labels_tensor, predictions=predictions_at_threshold)) return metrics
def grow_not_in_mem(): """Accumulates the data and grows a layer when ready.""" accumulators = [] dependencies = [] for i, feature_ids in enumerate(feature_ids_list): stats_summaries = stats_summaries_list[i] accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians (the last dimension). shape=[len(feature_ids), max_splits, bucket_size_list[i], 2], shared_name='numeric_stats_summary_accumulator_' + str(i)) accumulators.append(accumulator) apply_grad = accumulator.apply_grad( array_ops.stack(stats_summaries, axis=0), stamp_token) dependencies.append(apply_grad) def grow_tree_from_accumulated_summaries_fn(): """Updates tree with the best layer from accumulated summaries.""" # Take out the accumulated summaries from the accumulator and grow. stats_summaries_list = [] stats_summaries_list = [ array_ops.unstack(accumulator.take_grad(1), axis=0) for accumulator in accumulators ] grow_op = grow_tree_from_stats_summaries(stats_summaries_list, feature_ids_list) return grow_op with ops.control_dependencies(dependencies): if config.is_chief: min_accumulated = math_ops.reduce_min( array_ops.stack( [acc.num_accumulated() for acc in accumulators])) grow_model = control_flow_ops.cond( math_ops.greater_equal(min_accumulated, n_batches_per_layer), grow_tree_from_accumulated_summaries_fn, control_flow_ops.no_op, name='wait_until_n_batches_accumulated') return grow_model else: return control_flow_ops.no_op()
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): # pylint: disable=missing-docstring alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 alpha_p = -alpha * scale kept_idx = math_ops.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) kept_idx = math_ops.cast(kept_idx, K.floatx()) # Get affine transformation params a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 b = -a * alpha_p * rate # Apply mask x = inputs * kept_idx + alpha_p * (1 - kept_idx) # Do affine transformation return a * x + b
def assert_rank_at_least(x, rank, data=None, summarize=None, name=None): """Assert `x` has rank equal to `rank` or higher. Args: x: Numeric `Tensor`. rank: Scalar `Tensor`. data: The tensors to print out if the condition is False. Defaults to error message and first few entries of `x`. summarize: Print this many entries of each tensor. name: A name for this operation (optional). Defaults to "assert_rank_at_least". Returns: Op raising `InvalidArgumentError` unless `x` has specified rank or higher. Raises: ValueError: If static checks determine `x` has wrong rank. """ with ops.op_scope([x], name, 'assert_rank_at_least'): x = ops.convert_to_tensor(x, name='x') rank = ops.convert_to_tensor(rank, name='rank') # Attempt to statically defined rank. x_rank_static = x.get_shape().ndims rank_static = tensor_util.constant_value(rank) if x_rank_static is not None and rank_static is not None: if x_rank_static < rank_static: raise ValueError( 'Tensor %s must have rank %d. Received rank %d, shape %s' % (x.name, rank_static, x_rank_static, x.get_shape())) return control_flow_ops.no_op(name='static_checks_determined_all_ok') if data is None: data = [ 'Tensor %s must have rank at least' % x.name, rank, 'Received shape: ', array_ops.shape(x)] condition = math_ops.greater_equal(array_ops.rank(x), rank) return logging_ops.Assert(condition, data, summarize=summarize)
def center_bias(self, center_bias_var, gradients, hessians): # For not in memory situation, we need to accumulate enough of batches first # before proceeding with centering bias. # Create an accumulator. bias_dependencies = [] bias_accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians means only. # TODO(nponomareva): this will change for a multiclass shape=[2, 1], shared_name='bias_accumulator') grads_and_hess = array_ops.stack([gradients, hessians], axis=0) grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1) apply_grad = bias_accumulator.apply_grad(grads_and_hess, self._stamp_token) bias_dependencies.append(apply_grad) # Center bias if enough batches were processed. with ops.control_dependencies(bias_dependencies): if not self._is_chief: return control_flow_ops.no_op() def center_bias_from_accumulator(): accumulated = array_ops.unstack(bias_accumulator.take_grad(1), axis=0) return self._center_bias_fn(center_bias_var, array_ops.expand_dims(accumulated[0], 0), array_ops.expand_dims(accumulated[1], 0)) center_bias_op = control_flow_ops.cond( math_ops.greater_equal(bias_accumulator.num_accumulated(), self._n_batches_per_layer), center_bias_from_accumulator, control_flow_ops.no_op, name='wait_until_n_batches_for_bias_accumulated') return center_bias_op
def AddBackPropCounterLoop(self, count): """Add the backprop loop that controls the iterations. This is added to the backprop loop. It is used to control the loop termination and the slice index. The pseudocode is: `n = count; while (n >= 1) { n--; }` Args: count: The number of iterations for backprop. Returns: always 0. """ one = constant_op.constant(1, name="b_count") self.Enter() self.AddName(count.name) enter_count = _Enter(count, self._name, is_constant=False, parallel_iterations=self._parallel_iterations, name="b_count") merge_count = merge([enter_count, enter_count])[0] self._pivot_for_pred = merge_count cond = math_ops.greater_equal(merge_count, one) self._pivot = loop_cond(cond, name="b_count") switch_count = switch(merge_count, self._pivot) # Add next_iteration right after Switch to match the gradient function. next_count = next_iteration(switch_count[1]) self._pivot_for_body = next_count self._index = math_ops.sub(next_count, one) merge_count.op._update_input(1, self._index) exit_count = exit(switch_count[0], name="b_count") self.Exit() return exit_count
def center_bias_not_in_mem(): """Accumulates the data and updates the logits bias, when ready.""" bias_dependencies = [] bias_accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians means only. # TODO(nponomareva): this will change for a multiclass shape=[2, 1], shared_name='bias_accumulator') grads_and_hess = array_ops.stack([gradients, hessians], axis=0) grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1) apply_grad = bias_accumulator.apply_grad(grads_and_hess, stamp_token) bias_dependencies.append(apply_grad) def center_bias_from_accumulator(): accumulated = array_ops.unstack( bias_accumulator.take_grad(1), axis=0) return _center_bias_fn( array_ops.expand_dims(accumulated[0], 0), array_ops.expand_dims(accumulated[1], 0)) with ops.control_dependencies(bias_dependencies): if config.is_chief: center_bias_op = control_flow_ops.cond( math_ops.greater_equal(bias_accumulator.num_accumulated(), n_batches_per_layer), center_bias_from_accumulator, control_flow_ops.no_op, name='wait_until_n_batches_for_bias_accumulated') return center_bias_op else: return control_flow_ops.no_op()
def _accuracy_metric(predictions, target, weights=None): threshold_predictions = math_ops.to_float( math_ops.greater_equal(predictions, threshold)) return metrics_lib.streaming_accuracy(predictions=threshold_predictions, labels=target, weights=weights)
def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, fused_batch_norm): """Computes batch norm correction params. Before batch normalization is frozen: We use batch statistics for batch norm. correction_scale = sigma_b/sigma_mv correction_recip = 1/correction_scale correction_offset = 0 After batch normalization is frozen: correction_scale = sigma_b/sigma_mv correction_recip = 1 correction_offset = gamma*(mu_b/sigma_b-mu_mv/sigma_mv). Batch norm is frozen if global_step > bn_freeze_delay. The corrections ensure that: a) The weights are quantized after scaling by gamma/sigma_mv. This enables smoother training as the scaling on the weights changes slowly, rather than jump across mini-batches b) Changing the values of the corrections allows for one to switch between using batch statistics to using moving mean and average, without requiring changes to batch_norm Args: context: The scope under which we look for batch norm params match: Object containing required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. fused_batch_norm: Bool, true if fused batch norm is used. Returns: A tuple of correction_scale, correction_recip, correction_offset """ g = ops.get_default_graph() prefix = '' if not context else context + '/' with g.name_scope(prefix + 'batch_norm_correction'): recip_sigma_mv = math_ops.rsqrt( match.moving_variance_tensor + match.batch_epsilon) recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon) correction_scale = math_ops.divide( recip_sigma_mv, recip_sigma, name='scale_compute') correction_scale = array_ops.identity( correction_scale, name='correction_scale') correction_recip = math_ops.reciprocal( correction_scale, name='reciprocal_compute') correction_offset = math_ops.multiply( match.gamma_tensor, match.mean_tensor * recip_sigma - match.moving_mean_tensor * recip_sigma_mv, name='offset_compute') if freeze_batch_norm_delay is not None: use_mv_avg = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), freeze_batch_norm_delay, name='use_moving_average') else: use_mv_avg = False bn_decay_zero = 0.0 bn_decay_mean_consumers = list(match.bn_decay_mean_tensor.consumers()) bn_decay_var_consumers = list(match.bn_decay_mean_tensor.consumers()) bn_decay_mean_out = utils.smart_cond( use_mv_avg, lambda: bn_decay_zero, lambda: match.bn_decay_mean_tensor, name='freeze_moving_mean') graph_editor.reroute_ts( [bn_decay_mean_out], [match.bn_decay_mean_tensor], can_modify=bn_decay_mean_consumers) if fused_batch_norm is False: bn_decay_var_consumers = list(match.bn_decay_var_tensor.consumers()) bn_decay_var_out = utils.smart_cond( use_mv_avg, lambda: bn_decay_zero, lambda: match.bn_decay_var_tensor, name='freeze_moving_var') graph_editor.reroute_ts( [bn_decay_var_out], [match.bn_decay_var_tensor], can_modify=bn_decay_var_consumers) correction_recip = utils.smart_cond( use_mv_avg, lambda: array_ops.ones(correction_scale.shape), lambda: correction_recip, name='correction_recip') correction_offset = utils.smart_cond( use_mv_avg, lambda: correction_offset, lambda: array_ops.zeros(correction_offset.shape), name='correction_offset') return correction_scale, correction_recip, correction_offset
def get_binary_predictions_for_hinge(predictions): return math_ops.cast( math_ops.greater_equal(predictions, array_ops.zeros_like(predictions)), dtype=dtypes.int32)
def _streaming_metrics(predictions, labels): return streaming_metrics_fn(predictions=math_ops.to_float( math_ops.greater_equal(predictions, threshold)), labels=labels)
def train(train_op, logdir, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, session_wrapper=None, trace_every_n_steps=None, ignore_live_threads=False): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step are logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then training_util.get_or_create_global_step(), that is, tf.contrib.framework.global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer, or a list of them. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. session_wrapper: A function that takes a `tf.Session` object as the only argument and returns a wrapped session object that has the same methods that the original object has, or `None`. Iff not `None`, the wrapped object will be used for training. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. ignore_live_threads: If `True` ignores threads that remain running after a grace period when stopping the supervisor, instead of raising a RuntimeError. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): sync_optimizer = [sync_optimizer] if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = training_util.get_or_create_global_step() saver = saver or tf_saver.Saver() if sync_optimizer is not None: for opt in sync_optimizer: if not isinstance(opt, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.') with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( variables.local_variables_initializer(), lookup_ops.tables_initializer()) if sync_optimizer is not None and isinstance(sync_optimizer, list): with ops.control_dependencies([local_init_op] if local_init_op is not None else []): if is_chief: local_init_op = control_flow_ops.group( *[opt.chief_init_op for opt in sync_optimizer]) else: local_init_op = control_flow_ops.group( *[opt.local_step_init_op for opt in sync_optimizer]) ready_for_local_init_op = control_flow_ops.group( *[opt.ready_for_local_init_op for opt in sync_optimizer]) else: ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if is_chief and sync_optimizer is not None: # Need to create these BEFORE the supervisor finalizes the graph: init_tokens_op = [opt.get_init_tokens_op() for opt in sync_optimizer] chief_queue_runner = [ opt.get_chief_queue_runner() for opt in sync_optimizer] if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer total_loss = None should_retry = True while should_retry: try: should_retry = False with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if session_wrapper is not None: logging.info( 'Wrapping session with wrapper function: %s', session_wrapper) sess = session_wrapper(sess) if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: # (use sys.maxsize because sys.maxint doesn't exist in Python 3) _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxsize)) threads = sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, chief_queue_runner) sess.run(init_tokens_op) try: while not sv.should_stop(): total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') sv.request_stop() break except errors.OutOfRangeError as e: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training. %s', e) if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) sv.stop( threads, close_summary_writer=True, ignore_live_threads=ignore_live_threads) except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def train(train_op, logdir, metric_op=None, metric_collection_name=None, train_step_fn=train_step, train_step_kwargs=_USE_DEFAULT, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_samples=None, number_of_steps=None, number_of_epochs=None, batch_size=None, init_op=_USE_DEFAULT, init_feed_dict=None, local_init_op=_USE_DEFAULT, init_fn=None, ready_op=_USE_DEFAULT, summary_op=_USE_DEFAULT, save_summaries_secs=600, summary_writer=_USE_DEFAULT, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None, session_config=None, trace_every_n_steps=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. metric_op: A `Tensor` that, when executed, will update the streaming_metrics ops. metric_collection_name: The name associated with the metric_op. logdir: The directory where training logs are written to. If None, model checkpoints and summaries will not be written. train_step_fn: The function to call in order to execute a single gradient step. The function must have take exactly four arguments: the current session, the `train_op` `Tensor`, a global step `Tensor` and a dictionary. train_step_kwargs: A dictionary which is passed to the `train_step_fn`. By default, two `Boolean`, scalar ops called "should_stop" and "should_log" are provided. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The address of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training, as measured by 'global_step': training will stop if global_step is greater than 'number_of_steps'. If the value is left as None, training proceeds indefinitely. number_of_epochs: The total number of epochs per training. batch_size: The number of samples in each batch. init_op: The initialization operation. If left to its default value, then the session is initialized by calling `tf.global_variables_initializer()`. init_feed_dict: A feed dictionary to use when executing the `init_op`. local_init_op: The local initialization operation. If left to its default value, then the session is initialized by calling `tf.local_variables_initializer()` and `tf.tables_initializer()`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. ready_op: Operation to check if the model is ready to use. If left to its default value, then the session checks for readiness by calling `tf.report_uninitialized_variables()`. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. summary_writer: `SummaryWriter` to use. Can be `None` to indicate that no summaries should be written. If unset, we create a SummaryWriter. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If None, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. trace_every_n_steps: produce and save a `Timeline` in Chrome trace format and add it to the summaries every `trace_every_n_steps`. If None, no trace information will be produced or saved. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, if `number_of_steps` is negative, or if `trace_every_n_steps` is not `None` and no `logdir` is provided. """ # Check if the calculation of some metrics is desired. if metric_op is not None: # Check the necessary requirements if metric_collection_name is None: raise ValueError('metric_collection_name must be fed and cannot be No') if number_of_samples is None: raise ValueError('number_of_samples must be fed and cannot be No') if number_of_steps is None: raise ValueError('number_of_steps must be fed and cannot be No') if number_of_epochs is None: raise ValueError('number_of_epochs must be fed and cannot be No') if batch_size is None: raise ValueError('batch_size must be fed and cannot be None') if train_op is None: raise ValueError('train_op cannot be None.') if logdir is None: if summary_op != _USE_DEFAULT: raise ValueError('Cannot provide summary_op because logdir=None') if saver is not None: raise ValueError('Cannot provide saver because logdir=None') if trace_every_n_steps is not None: raise ValueError('Cannot provide trace_every_n_steps because ' 'logdir=None') if sync_optimizer is not None and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() with graph.as_default(): if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() with ops.name_scope('init_ops'): if init_op == _USE_DEFAULT: init_op = tf_variables.global_variables_initializer() if ready_op == _USE_DEFAULT: ready_op = tf_variables.report_uninitialized_variables() if local_init_op == _USE_DEFAULT: local_init_op = control_flow_ops.group( tf_variables.local_variables_initializer(), lookup_ops.tables_initializer()) if sync_optimizer is not None and isinstance( sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): with ops.control_dependencies([local_init_op] if local_init_op is not None else []): if is_chief: local_init_op = sync_optimizer.chief_init_op else: local_init_op = sync_optimizer.local_step_init_op ready_for_local_init_op = sync_optimizer.ready_for_local_init_op else: ready_for_local_init_op = None if summary_op == _USE_DEFAULT: summary_op = summary.merge_all() if summary_writer == _USE_DEFAULT: summary_writer = supervisor.Supervisor.USE_DEFAULT if is_chief and sync_optimizer is not None: if not isinstance(sync_optimizer, (sync_replicas_optimizer.SyncReplicasOptimizer)): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer.') # Need to create these BEFORE the supervisor finalizes the graph: init_tokens_op = sync_optimizer.get_init_tokens_op() chief_queue_runner = sync_optimizer.get_chief_queue_runner() if train_step_kwargs == _USE_DEFAULT: with ops.name_scope('train_step'): train_step_kwargs = {} if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) if is_chief and trace_every_n_steps is not None: train_step_kwargs['should_trace'] = math_ops.equal( math_ops.mod(global_step, trace_every_n_steps), 0) train_step_kwargs['logdir'] = logdir if number_of_samples is not None and batch_size is not None: train_step_kwargs['num_batches_per_epoch'] = int(number_of_samples / float(batch_size)) if number_of_samples is not None and batch_size is not None: train_step_kwargs['num_steps_per_epoch'] = int(number_of_steps / float(number_of_epochs)) # If metric calculation is desired. if metric_op is not None: # The reset_op is defined for resetting the streaming_variables(streaming_acurracy,...) # The reason for defining it here is that the supervisor finalized the graph and the graph will be fixed after it. # By calling the reset_op in the train_step function, after each epoch the total & count variables will reset to zero. # This help to have the averaged accuracy per epoch which is useful to realize if we are getting to the highest accuracy in the training. stream_vars = [i for i in tf.local_variables() if i.name.split('/')[1] == metric_collection_name] reset_op = tf.variables_initializer(stream_vars) sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, ready_for_local_init_op=ready_for_local_init_op, ready_op=ready_op, summary_op=summary_op, summary_writer=summary_writer, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) if summary_writer is not None: train_step_kwargs['summary_writer'] = sv.summary_writer should_retry = True while should_retry: try: should_retry = False with sv.managed_session( master, start_standard_services=False, config=session_config) as sess: logging.info('Starting Session.') if is_chief: if logdir: sv.start_standard_services(sess) elif startup_delay_steps > 0: _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) logging.info('Starting Queues.') if is_chief and sync_optimizer is not None: sv.start_queue_runners(sess, [chief_queue_runner]) sess.run(init_tokens_op) try: while not sv.should_stop(): if metric_op is not None: total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs, metric_op, reset_op) else: total_loss, should_stop = train_step_fn( sess, train_op, global_step, train_step_kwargs) if should_stop: logging.info('Stopping Training.') break except errors.OutOfRangeError: # OutOfRangeError is thrown when epoch limit per # tf.train.limit_epochs is reached. logging.info('Caught OutOfRangeError. Stopping Training.') if logdir and sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) except errors.AbortedError: # Always re-run on AbortedError as it indicates a restart of one of the # distributed tensorflow servers. logging.info('Retrying training!') should_retry = True return total_loss
def _InsertQuantOp(context, name, producer, consumers, is_training, moving_avg=True, init_min=-6.0, init_max=6.0, bits=8, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, narrow_range=False, producer_scope=None, consumer_scope=None): """Inserts a quant op between a producer op and (multiple) consumer ops. Args: context: Context where producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. consumers: Consumer operations of the pairs. is_training: Whether quantizing training graph or eval graph. moving_avg: Specifies whether to use exponential moving average or just the last value seen. init_min: Starting minimum value for the new quantization op. init_max: Starting maximum value for the new quantization op. bits: Number of bits to use for quantization, must be between 2 and 8. ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update quantization intervals for quantizing activations (see here about EMA: https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average). quant_delay: (Optional, default None) Int, count of global steps for which to delay quantization. This helps weights stabilize at the start of training. vars_collection: (Optional) Collection where to store the variables for quantization interval ends. narrow_range: Whether to use the narrow quantization range [1; 2^bits - 1] or wide range [0; 2^bits - 1]. producer_scope: The restriction of producer scope. If not None, the new op will be inserted only when the producer is in this scope. consumer_scope: The restriction of producer scope. If not None, the new op will be inserted only when all the consumers are in this scope. Raises: ValueError: When producer operation is not directly connected to the consumer operation. """ if producer_scope and not producer.name.startswith(producer_scope): logging.info( '_InsertQuantOp ignores context="%s" name="%s" ' 'because producer "%s" is not in scope "%s"', context, name, producer.name, producer_scope) return if consumer_scope: consumers_in_scope = [] for consumer in consumers: if consumer.name.startswith(consumer_scope): consumers_in_scope.append(consumer) else: logging.info( '_InsertQuantOp context="%s" name="%s" ignores ' 'consumer "%s" because it is not in scope "%s"', context, name, consumer.name, consumer_scope) return consumers = consumers_in_scope name_prefix = _AddContextToName(context, name) # This is needed on TPU where name_scope == 'TPUReplicate/loop', and # name_prefix starts with 'TPUReplicate/loop/'; without dropping it # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which # breaks things later. name_scope = ops.get_name_scope() if name_scope: name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/') inputs = producer.outputs[0] # Prevent ops from being quantized multiple times. Bypass ops can sometimes # overlap between multiple matches, so we need to ensure that we don't # add duplicate FakeQuant operations. fake_quant_ops = set([ 'FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxArgs' ]) if fake_quant_ops.intersection(set([c.type for c in inputs.consumers()])): return if moving_avg: quant = ( quant_ops.MovingAvgQuantize( inputs, init_min=init_min, init_max=init_max, ema_decay=ema_decay, is_training=is_training, num_bits=bits, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) else: quant = ( quant_ops.LastValueQuantize( inputs, init_min=init_min, init_max=init_max, is_training=is_training, num_bits=bits, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) if quant_delay and quant_delay > 0: activate_quant = math_ops.greater_equal( common.CreateOrGetQuantizationStep(), quant_delay, name=name_prefix + '/activate_quant') quant = control_flow_ops.cond( activate_quant, lambda: quant, lambda: inputs, name=name_prefix + '/delayed_quant') if consumers: tensors_modified_count = graph_editor.reroute_ts( [quant], [inputs], can_modify=consumers) # Some operations can have multiple output tensors going to the same # consumer. Since consumers is a set, we need to ensure that # tensors_modified_count is greater than or equal to the length of the set # of consumers. if tensors_modified_count < len(consumers): raise ValueError('No inputs quantized for ops: [%s]' % ', '.join( [consumer.name for consumer in consumers]))
def _train_op_fn(loss): """Run one training iteration.""" if training_state_cache: train_op.append(training_state_cache.insert(tree_ids, node_ids, logits)) if closed_form_grad_and_hess_fn: gradients, hessians = closed_form_grad_and_hess_fn(logits, labels) else: gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0] hessians = gradients_impl.gradients( gradients, logits, name='Hessians')[0] stats_summaries_list = [] for i, feature_ids in enumerate(feature_ids_list): num_buckets = bucket_size_list[i] summaries = [ array_ops.squeeze( boosted_trees_ops.make_stats_summary( node_ids=node_ids, gradients=gradients, hessians=hessians, bucketized_features_list=[input_feature_list[f]], max_splits=max_splits, num_buckets=num_buckets), axis=0) for f in feature_ids ] stats_summaries_list.append(summaries) accumulators = [] def grow_tree_from_stats_summaries(stats_summaries_list, feature_ids_list): """Updates ensemble based on the best gains from stats summaries.""" node_ids_per_feature = [] gains_list = [] thresholds_list = [] left_node_contribs_list = [] right_node_contribs_list = [] all_feature_ids = [] assert len(stats_summaries_list) == len(feature_ids_list) for i, feature_ids in enumerate(feature_ids_list): (numeric_node_ids_per_feature, numeric_gains_list, numeric_thresholds_list, numeric_left_node_contribs_list, numeric_right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=last_layer_nodes_range, stats_summary_list=stats_summaries_list[i], l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, min_node_weight=tree_hparams.min_node_weight, max_splits=max_splits)) all_feature_ids += feature_ids node_ids_per_feature += numeric_node_ids_per_feature gains_list += numeric_gains_list thresholds_list += numeric_thresholds_list left_node_contribs_list += numeric_left_node_contribs_list right_node_contribs_list += numeric_right_node_contribs_list grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. tree_ensemble.resource_handle, feature_ids=all_feature_ids, node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=tree_hparams.learning_rate, max_depth=tree_hparams.max_depth, pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING) return grow_op if train_in_memory and is_single_machine: train_op.append(distribute_lib.increment_var(global_step)) train_op.append( grow_tree_from_stats_summaries(stats_summaries_list, feature_ids_list)) else: dependencies = [] for i, feature_ids in enumerate(feature_ids_list): stats_summaries = stats_summaries_list[i] accumulator = data_flow_ops.ConditionalAccumulator( dtype=dtypes.float32, # The stats consist of grads and hessians (the last dimension). shape=[len(feature_ids), max_splits, bucket_size_list[i], 2], shared_name='numeric_stats_summary_accumulator_' + str(i)) accumulators.append(accumulator) apply_grad = accumulator.apply_grad( array_ops.stack(stats_summaries, axis=0), stamp_token) dependencies.append(apply_grad) def grow_tree_from_accumulated_summaries_fn(): """Updates the tree with the best layer from accumulated summaries.""" # Take out the accumulated summaries from the accumulator and grow. stats_summaries_list = [] stats_summaries_list = [ array_ops.unstack(accumulator.take_grad(1), axis=0) for accumulator in accumulators ] grow_op = grow_tree_from_stats_summaries(stats_summaries_list, feature_ids_list) return grow_op with ops.control_dependencies(dependencies): train_op.append(distribute_lib.increment_var(global_step)) if config.is_chief: min_accumulated = math_ops.reduce_min( array_ops.stack( [acc.num_accumulated() for acc in accumulators])) train_op.append( control_flow_ops.cond( math_ops.greater_equal(min_accumulated, n_batches_per_layer), grow_tree_from_accumulated_summaries_fn, control_flow_ops.no_op, name='wait_until_n_batches_accumulated')) return control_flow_ops.group(train_op, name='train_op')
def train( train_op, logdir, log_every_n_steps=1, graph=None, master='', is_chief=True, global_step=None, number_of_steps=None, init_op=_USE_DEFAULT, init_feed_dict=None, init_fn=None, summary_op=_USE_DEFAULT, save_summaries_secs=600, startup_delay_steps=0, saver=None, save_interval_secs=600, sync_optimizer=None): """Runs a training loop using a TensorFlow supervisor. When the sync_optimizer is supplied, gradient updates are applied synchronously. Otherwise, gradient updates are applied asynchronous. Args: train_op: A `Tensor` that, when executed, will apply the gradients and return the loss value. logdir: the directory where training logs are written to. log_every_n_steps: The frequency, in terms of global steps, that the loss and global step and logged. graph: The graph to pass to the supervisor. If no graph is supplied the default graph is used. master: The BNS name of the tensorflow master. is_chief: Specifies whether or not the training is being run by the primary replica during replica training. global_step: The `Tensor` representing the global step. If left as `None`, then slim.variables.get_or_create_global_step() is used. number_of_steps: The max number of gradient steps to take during training. If the value is left as None, training proceeds indefinitely. init_op: The initialization operation. init_feed_dict: A feed dictionary to use when executing the `init_op`. init_fn: An optional callable to be executed after `init_op` is called. The callable must accept one argument, the session being initialized. summary_op: The summary operation. save_summaries_secs: How often, in seconds, to save summaries. startup_delay_steps: The number of steps to wait for before beginning. Note that this must be 0 if a sync_optimizer is supplied. saver: Saver to save checkpoints. If none, a default one will be created and used. save_interval_secs: How often, in seconds, to save the model to `logdir`. sync_optimizer: an instance of tf.train.SyncReplicasOptimizer. If the argument is supplied, gradient updates will be synchronous. If left as `None`, gradient updates will be asynchronous. Returns: the value of the loss function after training. Raises: ValueError: if `train_op` is empty or if `startup_delay_steps` is non-zero when `sync_optimizer` is supplied, or if `number_of_steps` is negative. """ if train_op is None: raise ValueError('train_op cannot be None.') if sync_optimizer and startup_delay_steps > 0: raise ValueError( 'startup_delay_steps must be zero when sync_optimizer is supplied.') if number_of_steps is not None and number_of_steps <= 0: raise ValueError( '`number_of_steps` must be either None or a positive number.') graph = graph or ops.get_default_graph() if global_step is None: global_step = variables.get_or_create_global_step() saver = saver or tf_saver.Saver() if init_op is None: init_op = control_flow_ops.group( tf_variables.initialize_all_variables(), tf_variables.initialize_local_variables(), tf_variables.initialize_all_tables()) if summary_op == _USE_DEFAULT: summary_op = logging_ops.merge_all_summaries() local_init_op = None cleanup_op = None if is_chief and sync_optimizer: if not isinstance(sync_optimizer, sync_replicas_optimizer.SyncReplicasOptimizer): raise ValueError( '`sync_optimizer` must be a tf.train.SyncReplicasOptimizer') # Need to create these BEFORE the supervisor finalizes the graph: local_init_op = sync_optimizer.get_init_tokens_op() chief_queue_runner = sync_optimizer.get_chief_queue_runner() cleanup_op = sync_optimizer.get_clean_up_op() if number_of_steps: should_stop_op = math_ops.greater_equal(global_step, number_of_steps) else: should_stop_op = constant_op.constant(False) should_log_op = math_ops.equal( math_ops.mod(global_step, log_every_n_steps), 0) sv = supervisor.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, init_op=init_op, init_feed_dict=init_feed_dict, local_init_op=local_init_op, summary_op=summary_op, global_step=global_step, saver=saver, save_summaries_secs=save_summaries_secs, save_model_secs=save_interval_secs, init_fn=init_fn) with sv.managed_session(master, start_standard_services=False) as sess: if is_chief: sv.start_standard_services(sess) elif not is_chief and startup_delay_steps > 0: _wait_for_step(sess, global_step, min(startup_delay_steps, number_of_steps or sys.maxint)) sv.start_queue_runners(sess) if is_chief and sync_optimizer: sv.start_queue_runners(sess, [chief_queue_runner]) total_loss = train_loop( sv, sess, train_op, should_stop_op, should_log_op, global_step, cleanup_op) # This waits for service threads to finish. sv.Stop() if sv.is_chief: logging.info('Finished training! Saving model to disk.') sv.saver.save(sess, sv.save_path, global_step=sv.global_step) return total_loss
def get_binary_predictions_for_logistic(predictions, cutoff=0.5): return math_ops.cast( math_ops.greater_equal(predictions, array_ops.ones_like(predictions) * cutoff), dtype=dtypes.int32)