def _compute_auxiliary_structure(self, contents_and_mask): """Compute segment and position metadata.""" contents = contents_and_mask[:, : self._num_sequences] start_mask = tf.cast( contents_and_mask[:, self._num_sequences :], dtype=INDEX_DTYPE ) segment = tf.cumsum(start_mask, axis=0) uniform_count = tf.ones_like(segment[:, 0]) position = [] for i in range(self._num_sequences): segment_slice = segment[:, i] counts = tf.math.segment_sum(uniform_count, segment[:, i]) position.append( tf.range(self._packed_length) - tf.cumsum(tf.gather(counts, segment_slice - 1) * start_mask[:, i]) ) position = tf.concat([i[:, tf.newaxis] for i in position], axis=1) # Correct for padding tokens. pad_mask = tf.cast(tf.not_equal(contents, 0), dtype=INDEX_DTYPE) segment *= pad_mask position *= pad_mask return segment, position
def get_random_walk_noise_for_position_sequence(vel_sequence, noise_std_last_step): """Returns random-walk noise in the velocity applied to the position.""" acc_sequence = learned_simulator.time_diff(vel_sequence) # We want the noise scale in the velocity at the last step to be fixed. # Because we are going to compose noise at each step using a random_walk: # std_last_step**2 = num_acc * std_each_step**2 # so to keep `std_last_step` fixed, we apply at each step: # std_each_step `std_last_step / np.sqrt(num_input_velocities)` num_acc = acc_sequence.shape.as_list()[1] acc_sequence_noise = tf.random.normal(tf.shape(acc_sequence), stddev=noise_std_last_step / num_acc**0.5, dtype=vel_sequence.dtype) # Apply the random walk. acc_sequence_noise = tf.cumsum(acc_sequence_noise, axis=1) # Integrate the noise in the velocity to the positions, assuming # an Euler integrator and a dt = 1, and adding no noise to the very first # position (since that will only be used to calculate the first position # change). vel_sequence_noise = tf.concat([ tf.zeros_like(acc_sequence_noise[:, 0:1]), tf.cumsum(acc_sequence_noise, axis=1) ], axis=1) return vel_sequence_noise
def get_random_walk_noise_for_position_sequence(position_sequence, noise_std_last_step): """Returns random-walk noise in the velocity applied to the position.""" velocity_sequence = learned_simulator.time_diff(position_sequence) # input_sequence[:, 1:] - input_sequence[:, :-1] # We want the noise scale in the velocity at the last step to be fixed. # Because we are going to compose noise at each step using a random_walk: # std_last_step**2 = num_velocities * std_each_step**2 # so to keep `std_last_step` fixed, we apply at each step: # std_each_step `std_last_step / np.sqrt(num_input_velocities)` # TODO(alvarosg): Make sure this is consistent with the value and # description provided in the paper. num_velocities = velocity_sequence.shape.as_list()[1] velocity_sequence_noise = tf.random.normal(tf.shape(velocity_sequence), stddev=noise_std_last_step / num_velocities**0.5, dtype=position_sequence.dtype) # Apply the random walk. velocity_sequence_noise = tf.cumsum(velocity_sequence_noise, axis=1) # Integrate the noise in the velocity to the positions, assuming # an Euler intergrator and a dt = 1, and adding no noise to the very first # position (since that will only be used to calculate the first position # change). position_sequence_noise = tf.concat([ tf.zeros_like(velocity_sequence_noise[:, 0:1]), tf.cumsum(velocity_sequence_noise, axis=1) ], axis=1) return position_sequence_noise
def strategy(self, outputs, X, y, nump=False, cumulative=False): price_changes = X[:, 1:, :] - X[:, :-1, :] helper0 = 0.05 * X[:, 1:2, :] prices = X[:, 1:, :] strategychangeshelper = outputs[:, 1:, :] - outputs[:, :-1, :] strategychangeshelper = strategychangeshelper[:, 1:, :] helper1 = outputs[:, 1:2, :] helper2 = outputs[:, self.ttm - 2:self.ttm - 1, :] if cumulative: if nump: price_changes[:, 0, :] = 0.05 * price_changes[:, 0, :] strategychanges = np.concatenate( [helper1, strategychangeshelper, helper2], axis=1) gains_of_trade = np.cumsum(np.sum(price_changes * outputs, axis=2), axis=1) transaction_costs = np.cumsum(np.sum(np.abs(prices) * np.abs(strategychanges), axis=2), axis=1) else: price_changes = price_changes[:, 1:, :] price_changes = tf.concat([helper0, price_changes], axis=1) strategychanges = tf.concat( [helper1, strategychangeshelper, helper2], axis=1) gains_of_trade = tf.cumsum(tf.reduce_sum(price_changes * outputs, axis=2), axis=1) transaction_costs = tf.cumsum(tf.reduce_sum( np.abs(prices) * np.abs(strategychanges), axis=2), axis=1) else: if nump: price_changes[:, 0, :] = 0.05 * price_changes[:, 0, :] strategychanges = np.concatenate( [helper1, strategychangeshelper, helper2], axis=1) gains_of_trade = np.sum(np.sum(price_changes * outputs, axis=1), axis=1) transaction_costs = np.sum(np.sum(np.abs(prices) * np.abs(strategychanges), axis=1), axis=1) else: price_changes = price_changes[:, 1:, :] price_changes = tf.concat([helper0, price_changes], axis=1) strategychanges = tf.concat( [helper1, strategychangeshelper, helper2], axis=1) gains_of_trade = tf.reduce_sum(tf.reduce_sum(tf.multiply( price_changes, outputs), axis=1), axis=1) transaction_costs = tf.reduce_sum(tf.reduce_sum(tf.multiply( tf.abs(prices), tf.abs(strategychanges)), axis=1), axis=1) return gains_of_trade, transaction_costs
def infer_step(result, length): """Inference step.""" def print_info(result, length, new_length): vocab = self.problem_hparams.vocabulary["targets"] tf.logging.info( "length=%s new_length=%s length_diff=%s new_suffix=%s", length, new_length, new_length - length, str([ vocab._subtoken_id_to_subtoken_string(index) # pylint: disable=protected-access for index in result[0, -block_size:, 0, 0][:new_length - length] ]).decode("unicode-escape"), ) features["targets"] = tf.pad(result, [[0, 0], [0, 1], [0, 0], [0, 0]]) samples, logits, losses = self.sample(features) # pylint: disable=unused-variable _, top_k_indices = tf.nn.top_k( logits[:, :-1, :1, :, :], k=self._decode_hparams.guess_and_check_top_k) in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices), tf.expand_dims(result, 4)), axis=4) eos_cumsum = tf.cumsum(tf.to_int32( tf.equal(result, text_encoder.EOS_ID)), axis=1) after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0) correct = tf.logical_and(in_top_k, tf.logical_not(after_eos)) correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1) perfect_cumsum = 1 + tf.range(tf.shape(correct)[1]) for axis in [0, 2, 3]: perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis) new_length = tf.reduce_sum(tf.to_int32( tf.equal(correct_cumsum, perfect_cumsum)), axis=1) new_length = tf.squeeze(new_length, axis=[0, 1, 2]) new_length = tf.minimum(new_length, decode_length) new_result = tf.concat([ result[:, :new_length, :, :], tf.reshape(samples[:, new_length, :block_size, :], [1, block_size, 1, 1]) ], axis=1) with tf.control_dependencies( [tf.py_func(print_info, [result, length, new_length], [])]): new_result = tf.identity(new_result) return new_result, new_length
def _lovasz_grad(gt_sorted): """ Computes gradient of the Lovasz extension w.r.t sorted errors See Alg. 1 in paper """ gts = tf.reduce_sum(gt_sorted) intersection = gts - tf.cumsum(gt_sorted) union = gts + tf.cumsum(1. - gt_sorted) jaccard = 1. - intersection / union jaccard = tf.concat((jaccard[0:1], jaccard[1:] - jaccard[:-1]), 0) return jaccard
def mean_average_precision(labels, predictions, weights=None, topn=None, name=None): """Computes mean average precision (MAP). The implementation of MAP is based on Equation (1.7) in the following: Liu, T-Y "Learning to Rank for Information Retrieval" found at https://www.nowpublishers.com/article/DownloadSummary/INR-016 Args: labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a relevant example. predictions: A `Tensor` with shape [batch_size, list_size]. Each value is the ranking score of the corresponding example. weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The former case is per-example and the latter case is per-list. topn: A cutoff for how many examples to consider for this metric. name: A string used as the name for this metric. Returns: A metric for the mean average precision. """ with tf.compat.v1.name_scope(metric.name, 'mean_average_precision', (labels, predictions, weights)): labels, predictions, weights, topn = _prepare_and_validate_params( labels, predictions, weights, topn) sorted_labels, sorted_weights = utils.sort_by_scores(predictions, [labels, weights], topn=topn) # Relevance = 1.0 when labels >= 1.0. sorted_relevance = tf.cast(tf.greater_equal(sorted_labels, 1.0), dtype=tf.float32) per_list_relevant_counts = tf.cumsum(sorted_relevance, axis=1) per_list_cutoffs = tf.cumsum(tf.ones_like(sorted_relevance), axis=1) per_list_precisions = tf.math.divide_no_nan(per_list_relevant_counts, per_list_cutoffs) total_precision = tf.reduce_sum(input_tensor=per_list_precisions * sorted_weights * sorted_relevance, axis=1, keepdims=True) total_relevance = tf.reduce_sum(input_tensor=sorted_weights * sorted_relevance, axis=1, keepdims=True) per_list_map = tf.math.divide_no_nan(total_precision, total_relevance) # per_list_weights are computed from the whole list to avoid the problem of # 0 when there is no relevant example in topn. per_list_weights = _per_example_weights_to_per_list_weights( weights, tf.cast(tf.greater_equal(labels, 1.0), dtype=tf.float32)) return tf.compat.v1.metrics.mean(per_list_map, per_list_weights)
def _word_span_mask(inputs, tgt_len, num_predict, boundary, stride=1): """Sample whole word spans as prediction targets.""" # Note: 1.2 is roughly the token-to-word ratio non_pad_len = tgt_len + 1 - stride chunk_len_fp = non_pad_len / num_predict / 1.2 round_to_int = lambda x: tf.cast(tf.round(x), tf.int64) # Sample span lengths from a zipf distribution span_len_seq = np.arange(FLAGS.min_word, FLAGS.max_word + 1) probs = np.array([1.0 / (i + 1) for i in span_len_seq]) probs /= np.sum(probs) logits = tf.constant(np.log(probs), dtype=tf.float32) # Sample `num_predict` words here: note that this is over sampling span_lens = tf.random.categorical( logits=logits[None], num_samples=num_predict, dtype=tf.int64, )[0] + FLAGS.min_word # Sample the ratio [0.0, 1.0) of left context lengths span_lens_fp = tf.cast(span_lens, tf.float32) left_ratio = tf.random.uniform(shape=[num_predict], minval=0.0, maxval=1.0) left_ctx_len = left_ratio * span_lens_fp * (chunk_len_fp - 1) left_ctx_len = round_to_int(left_ctx_len) right_offset = round_to_int(span_lens_fp * chunk_len_fp) - left_ctx_len beg_indices = (tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) end_indices = beg_indices + span_lens # Remove out of range `boundary` indices max_boundary_index = tf.cast(tf.shape(boundary)[0] - 1, tf.int64) valid_idx_mask = end_indices < max_boundary_index beg_indices = tf.boolean_mask(beg_indices, valid_idx_mask) end_indices = tf.boolean_mask(end_indices, valid_idx_mask) beg_indices = tf.gather(boundary, beg_indices) end_indices = tf.gather(boundary, end_indices) # Shuffle valid `position` indices num_valid = tf.cast(tf.shape(beg_indices)[0], tf.int64) order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int64)) beg_indices = tf.gather(beg_indices, order) end_indices = tf.gather(end_indices, order) return _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len, num_predict)
def make_att_mask_from_breakpoints(att_breakpoints: tf.Tensor, use_starting_breakpoints: bool = False, name: Optional[Text] = None) -> tf.Tensor: """Makes self-attention mask from attention breakpoints. Each attention breakpoint marks the end of a segment by default (or the start if `use_starting_breakpoints` is True), and the resulting mask prevents attention across different segments. Args: att_breakpoints: <int32>[batch_size, seq_len] Tensor containing only 0 and 1 values, where each "1" marks the end of a segment (or the start, depending on `use_starting_breakpoints`). use_starting_breakpoints: If True, breakpoints represent starts of segments rather than ends of segments. Default False. name: A name for the operation (optional). Returns: <int32>[batch_size, seq_len, seq_len] attention mask. """ with tf.name_scope(name or 'make_att_mask_from_breakpoints'): att_breakpoints = tf.convert_to_tensor(att_breakpoints) if att_breakpoints.shape.rank != 2: raise ValueError('`att_breakpoints` must be a 2-D tensor.') if not use_starting_breakpoints: att_breakpoints = tensor_utils.shift_elements_right( att_breakpoints, axis=-1, amount=1) segment_ids = tf.cumsum(att_breakpoints, axis=1) return make_segmented_att_mask(segment_ids)
def graves_attn( src: tf.Tensor, tgt: tf.Tensor, nk: int, scope: str, w_stddev: float = 0.02 ) -> tf.Tensor: """Compute context vector and attention matrix. See also: Eq. (46-51) A. Graves https://arxiv.org/pdf/1308.0850.pdf Args: src: float tensor [nb, nsrc, nh] tgt: float tensor [nb, ntgt, ng] nk: number of Gaussians in attention Returns: float tensor [nb, ntgt, nh] """ with custom_variable_scope(scope): abk = tf.exp(linear(tgt, nk * 3, "linear_abk", w_stddev)) # [nb, ntgt, 1, nk] b = abk[:, :, tf.newaxis, nk : 2 * nk] # TODO: be careful for one step version k = tf.cumsum(abk[:, :, tf.newaxis, 2 * nk :], axis=1) nsrc = shape(src)[1] u = tf.reshape(tf.range(nsrc, dtype=k.dtype), (1, 1, nsrc, 1)) # [nb, ntgt, nsrc, nk] e = tf.exp(-b * (k - u)) # [nb, ntgt, nk, 1] a = abk[:, :, :nk, tf.newaxis] # [nb, ntgt, nsrc] attn = tf.squeeze(tf.matmul(e, a, name="mm_attn"), axis=-1) # [nb, ntgt, nh] w = tf.matmul(attn, src, name="mm_context") return w, attn
def aggregate(tensor): """Aggregate a tensor across distributed replicas. If not running in a distributed context, this just returns the input tensor. Args: tensor: tensor aggregate. Returns: output: A single tensor with all values across different replicas concatenated along the first axis. The output is in order of gpu index. """ replica_ctx = tf.distribute.get_replica_context() if not replica_ctx: return tensor num = tf.shape(tensor)[0:1] padded_num = _pad(num, replica_ctx.num_replicas_in_sync, replica_ctx.replica_id_in_sync_group) all_num = replica_ctx.all_reduce('sum', padded_num) index_in_output = tf.gather(tf.cumsum(tf.concat([[0], all_num], axis=0)), replica_ctx.replica_id_in_sync_group) total_num = tf.reduce_sum(all_num) padded_tensor = _pad(tensor, total_num, index_in_output) return replica_ctx.all_reduce('sum', padded_tensor)
def specgrams_to_melspecgrams(self, specgrams): """Converts specgrams to melspecgrams. Args: specgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2]. Returns: melspecgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2], mel scaling of frequencies. """ if self._mel_downscale is None: return specgrams logmag = specgrams[:, :, :, 0] p = specgrams[:, :, :, 1] mag2 = tf.exp(2.0 * logmag) phase_angle = tf.cumsum(p * np.pi, axis=-2) l2mel = tf.to_float(self._linear_to_mel_matrix()) logmelmag2 = self._safe_log(tf.tensordot(mag2, l2mel, 1)) mel_phase_angle = tf.tensordot(phase_angle, l2mel, 1) mel_p = spectral_util.instantaneous_frequency(mel_phase_angle) return tf.concat( [logmelmag2[:, :, :, tf.newaxis], mel_p[:, :, :, tf.newaxis]], axis=-1)
def boolean_mask(boxlist, indicator, fields=None, scope=None, use_static_shapes=False, indicator_sum=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the indicator tensor. By default, `boolean_mask` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only draw from a subset of fields. Args: boxlist: BoxList holding N boxes indicator: a rank-1 boolean tensor fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. use_static_shapes: Whether to use an implementation with static shape gurantees. indicator_sum: An integer containing the sum of `indicator` vector. Only required if `use_static_shape` is True. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indicator Raises: ValueError: if `indicator` is not a rank-1 boolean tensor. """ with tf.name_scope(scope, 'BooleanMask'): if indicator.shape.ndims != 1: raise ValueError('indicator should have rank 1') if indicator.dtype != tf.bool: raise ValueError('indicator should be a boolean tensor') if use_static_shapes: if not (indicator_sum and isinstance(indicator_sum, int)): raise ValueError('`indicator_sum` must be a of type int') selected_positions = tf.cast(indicator, dtype=tf.float32) indexed_positions = tf.cast( tf.multiply( tf.cumsum(selected_positions), selected_positions), dtype=tf.int32) one_hot_selector = tf.one_hot( indexed_positions - 1, indicator_sum, dtype=tf.float32) sampled_indices = tf.cast( tf.tensordot( tf.cast(tf.range(tf.shape(indicator)[0]), dtype=tf.float32), one_hot_selector, axes=[0, 0]), dtype=tf.int32) return gather(boxlist, sampled_indices, use_static_shapes=True) else: subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) subboxlist.add_field(field, subfieldlist) return subboxlist
def melspecgrams_to_specgrams(self, melspecgrams): """Converts melspecgrams to specgrams. Args: melspecgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2], mel scaling of frequencies. Returns: specgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2]. """ if self._mel_downscale is None: return melspecgrams logmelmag2 = melspecgrams[:, :, :, 0] mel_p = melspecgrams[:, :, :, 1] mel2l = tf.to_float(self._mel_to_linear_matrix()) mag2 = tf.tensordot(tf.exp(logmelmag2), mel2l, 1) logmag = 0.5 * self._safe_log(mag2) mel_phase_angle = tf.cumsum(mel_p * np.pi, axis=-2) phase_angle = tf.tensordot(mel_phase_angle, mel2l, 1) p = spectral_util.instantaneous_frequency(phase_angle) return tf.concat( [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)
def offsets_to_segment_ids(offsets): '''Transforms offsets to segment_ids, the segment_ids will be used in tf.segment_sum/segment_mean [3, 0, 1, 2] -> [0, 0, 0, 1, 3, 3]. ''' c = tf.cumsum(offsets) return tf.searchsorted(c, tf.range(c[-1]), side='right')
def unwrap(p, discont=np.pi, axis=-1): """Unwrap a cyclical phase tensor. Args: p: Phase tensor. discont: Float, size of the cyclic discontinuity. axis: Axis of which to unwrap. Returns: unwrapped: Unwrapped tensor of same size as input. """ dd = diff(p, axis=axis) ddmod = tf.mod(dd + np.pi, 2.0 * np.pi) - np.pi idx = tf.logical_and(tf.equal(ddmod, -np.pi), tf.greater(dd, 0)) ddmod = tf.where(idx, tf.ones_like(ddmod) * np.pi, ddmod) ph_correct = ddmod - dd idx = tf.less(tf.abs(dd), discont) ddmod = tf.where(idx, tf.zeros_like(ddmod), dd) ph_cumsum = tf.cumsum(ph_correct, axis=axis) shape = p.get_shape().as_list() shape[axis] = 1 ph_cumsum = tf.concat([tf.zeros(shape, dtype=p.dtype), ph_cumsum], axis=axis) unwrapped = p + ph_cumsum return unwrapped
def _get_values_from_start_and_end(self, input_tensor, num_start_samples, num_end_samples, total_num_samples): """slices num_start_samples and last num_end_samples from input_tensor. Args: input_tensor: An int32 tensor of shape [N] to be sliced. num_start_samples: Number of examples to be sliced from the beginning of the input tensor. num_end_samples: Number of examples to be sliced from the end of the input tensor. total_num_samples: Sum of is num_start_samples and num_end_samples. This should be a scalar. Returns: A tensor containing the first num_start_samples and last num_end_samples from input_tensor. """ input_length = tf.shape(input_tensor)[0] start_positions = tf.less(tf.range(input_length), num_start_samples) end_positions = tf.greater_equal(tf.range(input_length), input_length - num_end_samples) selected_positions = tf.logical_or(start_positions, end_positions) selected_positions = tf.cast(selected_positions, tf.float32) indexed_positions = tf.multiply(tf.cumsum(selected_positions), selected_positions) one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1, total_num_samples, dtype=tf.float32) return tf.cast( tf.tensordot(tf.cast(input_tensor, tf.float32), one_hot_selector, axes=[0, 0]), tf.int32)
def clip_eta(eta, ord, eps): """ Helper function to clip the perturbation to epsilon norm ball. :param eta: A tensor with the current perturbation. :param ord: Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2. :param eps: Epsilon, bound of the perturbation. """ # Clipping perturbation eta to self.ord norm ball if ord not in [np.inf, 1, 2]: raise ValueError('ord must be np.inf, 1, or 2.') reduc_ind = list(xrange(1, len(eta.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: eta = clip_by_value(eta, -eps, eps) elif ord == 1: # Implements a projection algorithm onto the l1-ball from # (Duchi et al. 2008) that runs in time O(d*log(d)) where d is the # input dimension. # Paper link (Duchi et al. 2008): https://dl.acm.org/citation.cfm?id=1390191 eps = tf.cast(eps, eta.dtype) dim = tf.reduce_prod(tf.shape(eta)[1:]) eta_flat = tf.reshape(eta, (-1, dim)) abs_eta = tf.abs(eta_flat) if 'sort' in dir(tf): mu = -tf.sort(-abs_eta, axis=-1) else: # `tf.sort` is only available in TF 1.13 onwards mu = tf.nn.top_k(abs_eta, k=dim, sorted=True)[0] cumsums = tf.cumsum(mu, axis=-1) js = tf.cast(tf.divide(1, tf.range(1, dim + 1)), eta.dtype) t = tf.cast(tf.greater(mu - js * (cumsums - eps), 0), eta.dtype) rho = tf.argmax(t * cumsums, axis=-1) rho_val = tf.reduce_max(t * cumsums, axis=-1) theta = tf.divide(rho_val - eps, tf.cast(1 + rho, eta.dtype)) eta_sgn = tf.sign(eta_flat) eta_proj = eta_sgn * tf.maximum(abs_eta - theta[:, tf.newaxis], 0) eta_proj = tf.reshape(eta_proj, tf.shape(eta)) norm = tf.reduce_sum(tf.abs(eta), reduc_ind) eta = tf.where(tf.greater(norm, eps), eta_proj, eta) elif ord == 2: # avoid_zero_div must go inside sqrt to avoid a divide by zero # in the gradient through this operation norm = tf.sqrt( tf.maximum(avoid_zero_div, reduce_sum(tf.square(eta), reduc_ind, keepdims=True))) # We must *clip* to within the norm ball, not *normalize* onto the # surface of the ball factor = tf.minimum(1., div(eps, norm)) eta = eta * factor return eta
def _subsample_selection_to_desired_neg_pos_ratio( self, indices, match, max_negatives_per_positive, min_negatives_per_image=0): """Subsample a collection of selected indices to a desired neg:pos ratio. This function takes a subset of M indices (indexing into a large anchor collection of N anchors where M<N) which are labeled as positive/negative via a Match object (matched indices are positive, unmatched indices are negative). It returns a subset of the provided indices retaining all positives as well as up to the first K negatives, where: K=floor(num_negative_per_positive * num_positives). For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors), with positives=[2, 5] and negatives=[4, 7, 9, 10] and num_negatives_per_positive=1, then the returned subset of indices is [2, 4, 5, 7]. Args: indices: An integer tensor of shape [M] representing a collection of selected anchor indices match: A matcher.Match object encoding the match between anchors and groundtruth boxes for a given image, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. max_negatives_per_positive: (float) maximum number of negatives for each positive anchor. min_negatives_per_image: minimum number of negative anchors for a given image. Allow sampling negatives in image without any positive anchors. Returns: selected_indices: An integer tensor of shape [M'] representing a collection of selected anchor indices with M' <= M. num_positives: An integer tensor representing the number of positive examples in selected set of indices. num_negatives: An integer tensor representing the number of negative examples in selected set of indices. """ positives_indicator = tf.gather(match.matched_column_indicator(), indices) negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices) num_positives = tf.reduce_sum( tf.cast(positives_indicator, dtype=tf.int32)) max_negatives = tf.maximum( min_negatives_per_image, tf.cast(max_negatives_per_positive * tf.cast(num_positives, dtype=tf.float32), dtype=tf.int32)) topk_negatives_indicator = tf.less_equal( tf.cumsum(tf.cast(negatives_indicator, dtype=tf.int32)), max_negatives) subsampled_selection_indices = tf.where( tf.logical_or(positives_indicator, topk_negatives_indicator)) num_negatives = tf.size(subsampled_selection_indices) - num_positives return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]), num_positives, num_negatives)
def _token_span_mask(inputs, tgt_len, num_predict, stride=1): """Sample token spans as prediction targets.""" non_pad_len = tgt_len + 1 - stride chunk_len_fp = non_pad_len / num_predict round_to_int = lambda x: tf.cast(tf.round(x), tf.int64) # Sample span lengths from a zipf distribution span_len_seq = np.arange(FLAGS.min_tok, FLAGS.max_tok + 1) probs = np.array([1.0 / (i + 1) for i in span_len_seq]) probs /= np.sum(probs) logits = tf.constant(np.log(probs), dtype=tf.float32) span_lens = tf.random.categorical( logits=logits[None], num_samples=num_predict, dtype=tf.int64, )[0] + FLAGS.min_tok # Sample the ratio [0.0, 1.0) of left context lengths span_lens_fp = tf.cast(span_lens, tf.float32) left_ratio = tf.random.uniform(shape=[num_predict], minval=0.0, maxval=1.0) left_ctx_len = left_ratio * span_lens_fp * (chunk_len_fp - 1) left_ctx_len = round_to_int(left_ctx_len) # Compute the offset from left start to the right end right_offset = round_to_int(span_lens_fp * chunk_len_fp) - left_ctx_len # Get the actual begin and end indices beg_indices = (tf.cumsum(left_ctx_len) + tf.cumsum(right_offset, exclusive=True)) end_indices = beg_indices + span_lens # Remove out of range indices valid_idx_mask = end_indices < non_pad_len beg_indices = tf.boolean_mask(beg_indices, valid_idx_mask) end_indices = tf.boolean_mask(end_indices, valid_idx_mask) # Shuffle valid indices num_valid = tf.cast(tf.shape(beg_indices)[0], tf.int64) order = tf.random.shuffle(tf.range(num_valid, dtype=tf.int64)) beg_indices = tf.gather(beg_indices, order) end_indices = tf.gather(end_indices, order) return _idx_pair_to_mask(beg_indices, end_indices, inputs, tgt_len, num_predict)
def build_lut(histo, step): # Compute the cumulative sum, shifting by step // 2 # and then normalization by step. lut = (tf.cumsum(histo) + (step // 2)) // step # Shift lut, prepending with 0. lut = tf.concat([[0], lut[:-1]], 0) # Clip the counts to be in range. This is done # in the C code for image.point. return tf.clip_by_value(lut, 0, 255)
def top_p_logits(logits, p): with tf.variable_scope('top_p_logits'): logits_sort = tf.sort(logits, direction='DESCENDING') probs_sort = tf.nn.softmax(logits_sort) probs_sums = tf.cumsum(probs_sort, axis=1, exclusive=True) logits_masked = tf.where(probs_sums < p, logits_sort, tf.ones_like(logits_sort)*1000) # [batchsize, vocab] min_logits = tf.reduce_min(logits_masked, axis=1, keepdims=True) # [batchsize, 1] return tf.where( logits < min_logits, tf.ones_like(logits, dtype=logits.dtype) * -1e10, logits, )
def verb_refs_to_lengths(task, verb_refs, include_eos=True): """Computes the length of a sequence.""" eos_positions = tf.to_int32(tf.expand_dims( tf.where(tf.equal(task, 1))[:, 1], 1)) seq_mask = tf.logical_not(tf.cast(tf.cumsum(tf.to_int32( tf.logical_and( tf.equal(verb_refs[:, :, 0], eos_positions), tf.equal(verb_refs[:, :, 1], eos_positions + 1))), axis=-1), tf.bool)) lengths = tf.reduce_sum(tf.to_float(seq_mask), axis=-1) if include_eos: lengths = lengths + 1 return lengths
def sequence_accuracy(gt_seqs, decode_seqs, gt_seq_lengths, pr_seq_lengths, debug=False, name=""): """Computes the complete and the partial sequence accuracy.""" gt_shape = common_layers.shape_list(gt_seqs) pr_shape = common_layers.shape_list(decode_seqs) batch_size = gt_shape[0] depth = gt_shape[-1] gt_len = gt_shape[1] pr_len = pr_shape[1] max_len = tf.maximum(gt_len, pr_len) gt_seqs = tf.pad(gt_seqs, [[0, 0], [0, max_len - gt_len], [0, 0]]) decode_seqs = tf.pad(decode_seqs, [[0, 0], [0, max_len - pr_len], [0, 0]]) gt_seqs = tf.where( tf.tile( tf.expand_dims(tf.sequence_mask(gt_seq_lengths, maxlen=max_len), 2), [1, 1, depth]), gt_seqs, tf.fill(tf.shape(gt_seqs), -1)) decode_seqs = tf.where( tf.tile( tf.expand_dims(tf.sequence_mask(pr_seq_lengths, maxlen=max_len), 2), [1, 1, depth]), decode_seqs, tf.fill(tf.shape(decode_seqs), -1)) # [batch_size, decode_length] corrects = tf.reduce_all(tf.equal(gt_seqs, decode_seqs), -1) correct_mask = tf.reduce_all(corrects, -1) # [batch_size] if debug: incorrect_mask = tf.logical_not(correct_mask) incorrect_gt = tf.boolean_mask(gt_seqs, incorrect_mask) incorrect_pr = tf.boolean_mask(decode_seqs, incorrect_mask) with tf.control_dependencies([ tf.print(name + "_mismatch", incorrect_gt, incorrect_pr, summarize=1000) ]): correct_mask = tf.identity(correct_mask) correct_seqs = tf.to_float(correct_mask) total_correct_seqs = tf.reduce_sum(correct_seqs) mean_complete_accuracy = total_correct_seqs / tf.to_float(batch_size) # Compute partial accuracy errors = tf.logical_not(corrects) errors = tf.cast(tf.cumsum(tf.to_float(errors), axis=-1), tf.bool) # [batch_size] correct_steps = tf.reduce_sum(tf.to_float(tf.logical_not(errors)), axis=-1) mean_partial_accuracy = tf.reduce_mean( tf.div(tf.minimum(correct_steps, gt_seq_lengths), gt_seq_lengths)) return mean_complete_accuracy, mean_partial_accuracy
def generate_action_mask(features): """Computes the decode mask from "task" and "verb_refs".""" eos_positions = tf.to_int32( tf.expand_dims(tf.where(tf.equal(features["task"], 1))[:, 1], 1)) decode_mask = tf.cumsum(tf.to_int32( tf.logical_and( tf.equal(features["verb_refs"][:, :, 0], eos_positions), tf.equal(features["verb_refs"][:, :, 1], eos_positions + 1))), axis=-1) decode_mask = tf.sequence_mask(tf.reduce_sum( tf.to_int32(tf.less(decode_mask, 1)), -1), maxlen=tf.shape(decode_mask)[1]) return decode_mask
def ComputeChainStats(chain, target_mean, num_leapfrog_steps): # Chain is [num_steps, batch, num_dims] num_steps = tf.shape(chain)[0] counts = tf.to_float(tf.range(1, num_steps + 1)) chain_mean = tf.cumsum(chain, 0) / counts[:, tf.newaxis, tf.newaxis] bias = target_mean - tf.reduce_mean(chain_mean, 1) variance = tf.reduce_mean( tf.square(chain_mean - tf.reduce_mean(chain_mean, 1, keep_dims=True)), 1) inst_bias = target_mean - tf.reduce_mean(chain, 1) inst_variance = tf.reduce_mean(tf.square(target_mean - chain), 1) def reducer(_, idx): chain_mean = tf.reduce_mean(chain[idx // 2:idx], 0) bias = tf.reduce_mean(target_mean - chain_mean, 0) variance = tf.reduce_mean( tf.square(chain_mean - tf.reduce_mean(chain_mean, 0)), 0) return bias, variance indices = 1 + tf.range(num_steps) warmupped_bias, warmupped_variance = tf.scan(reducer, indices, initializer=(chain[0, 0], chain[0, 0])) half_steps = num_steps // 2 half_chain = chain[half_steps:] error_sq = tf.reduce_mean( tf.square(tf.reduce_mean(half_chain, 0) - target_mean), 0) ess = utils.EffectiveSampleSize(half_chain) / tf.to_float(half_steps) ess_per_grad = ess / tf.to_float(num_leapfrog_steps) rhat = tfp.mcmc.potential_scale_reduction(half_chain) autocorr = tf.reduce_mean( utils.SanitizedAutoCorrelation(half_chain, 0, max_lags=300), 1) return ChainStats(bias=bias, variance=variance, error_sq=error_sq, inst_bias=inst_bias, inst_variance=inst_variance, ess=ess, ess_per_grad=ess_per_grad, rhat=rhat, warmupped_bias=warmupped_bias, warmupped_variance=warmupped_variance, autocorr=autocorr)
def top_p_logits(logits, p): """Nucleus sampling""" batch, _ = logits.shape.as_list() sorted_logits = tf.sort(logits, direction='DESCENDING', axis=-1) cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1) indices = tf.stack([ tf.range(0, batch), # number of indices to include tf.maximum(tf.reduce_sum(tf.cast(cumulative_probs <= p, tf.int32), axis=-1) - 1, 0), ], axis=-1) min_values = tf.gather_nd(sorted_logits, indices) return tf.where( logits < min_values, tf.ones_like(logits) * -1e10, logits, )
def _distributional_to_value(value_d, size, subscale, threshold): """Get a scalar value out of a value distribution in distributional RL.""" half = size // 2 value_range = (tf.to_float(tf.range(-half, half)) + 0.5) * subscale probs = tf.nn.softmax(value_d) if threshold == 0.0: return tf.reduce_sum(probs * value_range, axis=-1) # accumulated_probs[..., i] is the sum of probabilities in buckets upto i # so it is the probability that value <= i'th bucket value accumulated_probs = tf.cumsum(probs, axis=-1) # New probs are 0 on all lower buckets, until the threshold probs = tf.where(accumulated_probs < threshold, tf.zeros_like(probs), probs) probs /= tf.reduce_sum(probs, axis=-1, keepdims=True) # Re-normalize. return tf.reduce_sum(probs * value_range, axis=-1)
def test_readme_example(self): data = tf.random.uniform((128, 128), 0, 10, dtype=tf.int32) histogram = tf.bincount(data, minlength=10, maxlength=10) cdf = tf.cumsum(histogram, exclusive=False) cdf = tf.pad(cdf, [[1, 0]]) cdf = tf.reshape(cdf, [1, 1, -1]) data = tf.cast(data, tf.int16) encoded = range_coding_ops.range_encode(data, cdf, precision=14) decoded = range_coding_ops.range_decode(encoded, tf.shape(data), cdf, precision=14) with self.cached_session() as sess: self.assertAllEqual(*sess.run((data, decoded)))
def categorical_case(pmf, fns, rand=None): """Returns the outputs of fns[i] with probability pmf[i]. Args: pmf: A 1-D tensor of probabilities, the probability mass function. fns: A list of callables that return tensors, same length as pmf. rand: An optional scalar between 0.0 and 1.0, the output of an RNG. Returns: A tensor, the output of fns[i] with probability pmf[i]. """ rand = tf.random_uniform([]) if rand is None else rand cmf = tf.pad(tf.cumsum(pmf), [(1, 0)]) cmf = [cmf[i] for i in range(len(fns) + 1)] preds = [(rand >= a) & (rand < b) for a, b in zip(cmf[:-1], cmf[1:])] return tf.case(list(zip(preds, fns)), exclusive=True)