def _sparse_intersect_indices(sp_tensor, required_sp_tensor): """Filters timestamps in sp_tensor to those present in required_sp_tensor.""" # We extend both sp_tensor and required_sp_tensor with each others indices # so that they have the same indices. # E.g. their dense representation of one batch entry could be: # [dummy, dummy, 1 ] dummy_value = 'n/a' dummy_required_sp_tensor = _extend_with_dummy( sp_tensor, required_sp_tensor, dummy_value) dummy_sp_tensor = _extend_with_dummy(required_sp_tensor, sp_tensor, dummy_value) # We get rid to dummy values both for indices in the required_sp_tensor and # the sp_tensor. # First get rid of indices with dummy values in dummy_required_sp_tensor. in_required = tf.sparse_retain( dummy_sp_tensor, tf.logical_not(tf.equal(dummy_required_sp_tensor.values, dummy_value))) # Remove empty timesteps so that the timesteps align with the original # required_sp_tensor. # Then remove the indices with dummy values. in_required = tf.sparse_retain( _remove_empty_timesteps(in_required), tf.logical_not(tf.equal(in_required.values, dummy_value))) if sp_tensor.values.dtype != tf.string: in_required = tf.SparseTensor( indices=in_required.indices, dense_shape=in_required.dense_shape, values=tf.strings.to_number( in_required.values, out_type=sp_tensor.values.dtype)) return in_required
def get_scheduled_sample_inputs(self, done_warm_start, groundtruth_items, generated_items, scheduled_sampling_func): """Scheduled sampling. Args: done_warm_start: whether we are done with warm start or not. groundtruth_items: list of ground truth items. generated_items: list of generated items. scheduled_sampling_func: scheduled sampling function to choose between groundtruth items and generated items. Returns: A mix list of ground truth and generated items. """ def sample(): """Calculate the scheduled sampling params based on iteration number.""" with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE): return [ scheduled_sampling_func(item_gt, item_gen) for item_gt, item_gen in zip(groundtruth_items, generated_items) ] cases = [ (tf.logical_not(done_warm_start), lambda: groundtruth_items), (tf.logical_not(self.is_training), lambda: generated_items), ] output_items = tf.case(cases, default=sample, strict=True) return output_items
def _get_triplet_mask(labels): """Return a 3D mask where mask[a, p, n] is True iff the triplet (a, p, n) is valid. A triplet (i, j, k) is valid if: - i, j, k are distinct - labels[i] == labels[j] and labels[i] != labels[k] Args: labels: tf.int32 `Tensor` with shape [batch_size] """ # Check that i, j and k are distinct indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool) indices_not_equal = tf.logical_not(indices_equal) i_not_equal_j = tf.expand_dims(indices_not_equal, 2) i_not_equal_k = tf.expand_dims(indices_not_equal, 1) j_not_equal_k = tf.expand_dims(indices_not_equal, 0) distinct_indices = tf.logical_and( tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k) # Check if labels[i] == labels[j] and labels[i] != labels[k] label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1)) i_equal_j = tf.expand_dims(label_equal, 2) i_equal_k = tf.expand_dims(label_equal, 1) valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k)) # Combine the two masks mask = tf.logical_and(distinct_indices, valid_labels) return mask
def _loop_cond(i, unused_alive_seq, alive_log_probs, unused_finished_seq, finished_scores, finished_in_finished): """Checking termination condition. We terminate when we decoded up to decode_length or the lowest scoring item in finished has a greater score that the higest prob item in alive divided by the max length penalty. Optionally also terminate if all alive scores are below lower bound. Args: i: loop index alive_log_probs: probabilities of the beams. [batch_size, beam_size] finished_scores: scores for each of these sequences. [batch_size, beam_size] finished_in_finished: finished bools for each of these sequences. [batch_size, beam_size] Returns: True to continue the loop, False to stop. """ max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) / 6.), alpha) # The best possible score of the most likley alive sequence lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty # Now to compute the lowest score of a finished sequence in finished # If the sequence isn't finished, we multiply it's score by 0. since # scores are all -ve, taking the min will give us the score of the lowest # finished item. lowest_score_of_finished_in_finished = tf.reduce_min( finished_scores * tf.to_float(finished_in_finished), axis=1) # If none of the sequences have finished, then the min will be 0 and # we have to replace it by -ve INF if it is. The score of any seq in alive # will be much higher than -ve INF and the termination condition will not # be met. lowest_score_of_finished_in_finished = _apply_negative_infinity_mask( lowest_score_of_finished_in_finished, tf.logical_not(tf.reduce_any(finished_in_finished, 1))) # Will terminate beam search early if bound_is_met is True. bound_is_met = tf.reduce_all( tf.greater(lowest_score_of_finished_in_finished, lower_bound_alive_scores)) # Check if all alive scores are below minimum. if minimum_score: minimum_score_log = tf.log(minimum_score) bound_is_met = tf.logical_or( bound_is_met, tf.reduce_all( tf.less(lower_bound_alive_scores, minimum_score_log))) return tf.logical_and(tf.less(i, decode_length), tf.logical_not(bound_is_met))
def sequence_accuracy(gt_seqs, decode_seqs, gt_seq_lengths, pr_seq_lengths, debug=False, name=""): """Computes the complete and the partial sequence accuracy.""" gt_shape = common_layers.shape_list(gt_seqs) pr_shape = common_layers.shape_list(decode_seqs) batch_size = gt_shape[0] depth = gt_shape[-1] gt_len = gt_shape[1] pr_len = pr_shape[1] max_len = tf.maximum(gt_len, pr_len) gt_seqs = tf.pad(gt_seqs, [[0, 0], [0, max_len - gt_len], [0, 0]]) decode_seqs = tf.pad(decode_seqs, [[0, 0], [0, max_len - pr_len], [0, 0]]) gt_seqs = tf.where( tf.tile( tf.expand_dims(tf.sequence_mask(gt_seq_lengths, maxlen=max_len), 2), [1, 1, depth]), gt_seqs, tf.fill(tf.shape(gt_seqs), -1)) decode_seqs = tf.where( tf.tile( tf.expand_dims(tf.sequence_mask(pr_seq_lengths, maxlen=max_len), 2), [1, 1, depth]), decode_seqs, tf.fill(tf.shape(decode_seqs), -1)) # [batch_size, decode_length] corrects = tf.reduce_all(tf.equal(gt_seqs, decode_seqs), -1) correct_mask = tf.reduce_all(corrects, -1) # [batch_size] if debug: incorrect_mask = tf.logical_not(correct_mask) incorrect_gt = tf.boolean_mask(gt_seqs, incorrect_mask) incorrect_pr = tf.boolean_mask(decode_seqs, incorrect_mask) with tf.control_dependencies([ tf.print(name + "_mismatch", incorrect_gt, incorrect_pr, summarize=1000) ]): correct_mask = tf.identity(correct_mask) correct_seqs = tf.to_float(correct_mask) total_correct_seqs = tf.reduce_sum(correct_seqs) mean_complete_accuracy = total_correct_seqs / tf.to_float(batch_size) # Compute partial accuracy errors = tf.logical_not(corrects) errors = tf.cast(tf.cumsum(tf.to_float(errors), axis=-1), tf.bool) # [batch_size] correct_steps = tf.reduce_sum(tf.to_float(tf.logical_not(errors)), axis=-1) mean_partial_accuracy = tf.reduce_mean( tf.div(tf.minimum(correct_steps, gt_seq_lengths), gt_seq_lengths)) return mean_complete_accuracy, mean_partial_accuracy
def preprocess_device_grads(self, device_grads): compact_grads = (self.benchmark_cnn.params.use_fp16 and self.benchmark_cnn.params.compact_gradient_transfer) defer_grads = ( self.benchmark_cnn.params.variable_consistency == 'relaxed') grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads] algorithm = batch_allreduce.algorithm_from_params( self.benchmark_cnn.params) reduced_grads, self._warmup_ops = algorithm.batch_all_reduce( grads_to_reduce, self.benchmark_cnn.params.gradient_repacking, compact_grads, defer_grads, self.benchmark_cnn.params.xla_compile) if self.benchmark_cnn.enable_auto_loss_scale: # Check for infs or nans is_finite_list = [] with tf.name_scope('check_for_inf_and_nan'): for tower_grads in reduced_grads: with tf.colocate_with(tower_grads[0]): # TODO(tanmingxing): Create fused op that takes in a list of tensors # as input and returns scalar boolean True if there are any # infs/nans. is_finite_list.append( tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in tower_grads ])) self.grad_has_inf_nan = tf.logical_not( tf.reduce_all(is_finite_list)) reduced_device_grads = [[ (g, v) for g, (_, v) in zip(grads, grad_vars) ] for grads, grad_vars in zip(reduced_grads, device_grads)] return self.benchmark_cnn.devices, reduced_device_grads
def _match(self, similarity_matrix, valid_rows): """Bipartite matches a collection rows and columns. A greedy bi-partite. TODO(rathodv): Add num_valid_columns options to match only that many columns with all the rows. Args: similarity_matrix: Float tensor of shape [N, M] with pairwise similarity where higher values mean more similar. valid_rows: A boolean tensor of shape [N] indicating the rows that are valid. Returns: match_results: int32 tensor of shape [M] with match_results[i]=-1 meaning that column i is not matched and otherwise that it is matched to row match_results[i]. """ valid_row_sim_matrix = tf.gather( similarity_matrix, tf.squeeze(tf.where(valid_rows), axis=-1)) invalid_row_sim_matrix = tf.gather( similarity_matrix, tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1)) similarity_matrix = tf.concat( [valid_row_sim_matrix, invalid_row_sim_matrix], axis=0) # Convert similarity matrix to distance matrix as tf.image.bipartite tries # to find minimum distance matches. distance_matrix = -1 * similarity_matrix num_valid_rows = tf.reduce_sum(tf.cast(valid_rows, dtype=tf.float32)) _, match_results = image_ops.bipartite_match( distance_matrix, num_valid_rows=num_valid_rows) match_results = tf.reshape(match_results, [-1]) match_results = tf.cast(match_results, tf.int32) return match_results
def _compute_head_weights_with_time_prior(weights, paddings, time_deltas, num_heads, time_exp_base, overlapping_chunks): """Computes head-specific attention weights with time prior. This function simply masks out the weights for items if they don't belong to a certain chunk. Here, chunks are allocated based on time information. We use exponential function--pow(time_exp_base,i)--to allocate segment boundaries. Note that time delta values represent number of days. Example 1: Let overlapping_chunks=False, time_exp_base=3 and num_heads=3. 1st head focuses on the items within time interval [0, pow(3,0)], 2nd head focuses on the items within time interval (pow(3,0), pow(3,1)], 3rd (last) head focuses on the items within time interval (pow(3,1), inf] Example 2: Let overlapping_chunks=True, time_exp_base=3 and num_heads=3. 1st head focuses on the items within time interval [0, pow(3,0)], 2nd head focuses on the items within time interval [0, pow(3,1)], 3rd (last) head focuses on the items within time interval [0, inf] Args: weights: A 3d tensor with shape of [h*N, T_q, T_k]. paddings: A 3d tensor with shape of [h*N, T_q, T_k]. time_deltas: A 3d tensor with shape of [N, T_q, T_k]. num_heads: An integer denoting number of chunks. time_exp_base: A scalar. Base for exponential time intervals. overlapping_chunks: Boolean. Whether to use overlapping chunks. Returns: A list of h tensors (each shaped [N, T_q, T_k]) where tensors correspond to chunk specific weights. """ tf.logging.info( "Computing with time_exp_base:{} and overlapping_chunks:{}".format( time_exp_base, overlapping_chunks)) chunk_outputs_list = [] weights_split = tf.split(weights, num_heads, axis=0) paddings_split = tf.split(paddings, num_heads, axis=0) ones_tensor = tf.ones_like(time_deltas) # (N, T_q, T_k) # False in previous items and True in future items. mask_previous_head = time_deltas < 0 # (N, T_q, T_k) for i in range(num_heads): if i == (num_heads - 1): # Last chunk considers all the remaining items. # All True. mask_next_head = tf.ones_like(time_deltas, dtype=bool) # (N, T_q, T_k) else: mask_next_head = tf.math.less_equal( time_deltas, (time_exp_base**i) * ones_tensor) # (N, T_q, T_k) mask = tf.logical_and(tf.logical_not(mask_previous_head), mask_next_head) # (N, T_q, T_k) output = tf.where(mask, weights_split[i], paddings_split[i]) # (N, T_q, T_k) chunk_outputs_list.append(output) # Update previous mask for non-overlapping chunks. if not overlapping_chunks: mask_previous_head = mask_next_head return chunk_outputs_list
def __init__(self, sess, reward_scale, ipd_scale, observation_shape=NATURE_DQN_OBSERVATION_SHAPE, resize_shape=PSEUDO_COUNT_OBSERVATION_SHAPE, quantization_factor=PSEUDO_COUNT_QUANTIZATION_FACTOR, tf_device='/cpu:*', optimizer=tf.train.RMSPropOptimizer( learning_rate=0.0001, momentum=0.9, epsilon=0.0001)): self._sess = sess self.reward_scale = reward_scale self.ipd_scale = ipd_scale self.observation_shape = observation_shape self.resize_shape = resize_shape self.quantization_factor = quantization_factor self.optimizer = optimizer with tf.device(tf_device), tf.name_scope('intrinsic_pixelcnn'): observation_shape = (1,) + observation_shape + (1,) self.obs_ph = tf.placeholder(tf.uint8, shape=observation_shape, name='obs_ph') self.preproccessed_obs = self._preprocess(self.obs_ph, resize_shape) self.iter_ph = tf.placeholder(tf.uint32, shape=[], name='iter_num') self.eval_ph = tf.placeholder(tf.bool, shape=[], name='eval_mode') self.network = tf.make_template('PixelCNN', self._network_template) self.ipd = tf.cond(tf.logical_not(self.eval_ph), self.update, self.virtual_update) self.reward = self.ipd_to_reward(self.ipd, self.iter_ph)
def get_gan_loss(self, true_frames, gen_frames, name): """Get the discriminator + generator loss at every step. This performs an 1:1 update of the discriminator and generator at every step. Args: true_frames: 5-D Tensor of shape (num_steps, batch_size, H, W, C) Assumed to be ground truth. gen_frames: 5-D Tensor of shape (num_steps, batch_size, H, W, C) Assumed to be fake. name: discriminator scope. Returns: loss: 0-D Tensor, with d_loss + g_loss """ # D - STEP with tf.variable_scope("%s_discriminator" % name, reuse=tf.AUTO_REUSE): gan_d_loss, _, fake_logits_stop = self.d_step( true_frames, gen_frames) # G - STEP with tf.variable_scope("%s_discriminator" % name, reuse=True): gan_g_loss_pos_d, gan_g_loss_neg_d = self.g_step( gen_frames, fake_logits_stop) gan_g_loss = gan_g_loss_pos_d + gan_g_loss_neg_d tf.summary.scalar("gan_loss_%s" % name, gan_g_loss_pos_d + gan_d_loss) if self.hparams.gan_optimization == "joint": gan_loss = gan_g_loss + gan_d_loss else: curr_step = self.get_iteration_num() gan_loss = tf.cond(tf.logical_not(curr_step % 2 == 0), lambda: gan_g_loss, lambda: gan_d_loss) return gan_loss
def prune_completely_outside_window(boxes, window): """ Prunes bounding boxes that fall completely outside of the given window. This function does not clip partially overflowing boxes. Arguments: boxes: a float tensor with shape [M_in, 4]. window: a float tensor with shape [4] representing [ymin, xmin, ymax, xmax] of the window. Returns: boxes: a float tensor with shape [M_out, 4] where 0 <= M_out <= M_in. valid_indices: a long tensor with shape [M_out] indexing the valid bounding boxes in the input 'boxes' tensor. """ y_min, x_min, y_max, x_max = tf.split(boxes, num_or_size_splits=4, axis=1) # they have shape [None, 1] win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) # they have shape [] coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], axis=1) valid_indices = tf.squeeze(tf.where( tf.logical_not(tf.reduce_any(coordinate_violations, 1))), axis=1) boxes = tf.gather(boxes, valid_indices) return boxes, valid_indices
def prune_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall outside a given window. This function prunes bounding boxes that even partially fall outside the given window. See also clip_to_window which only prunes bounding boxes that fall completely outside the window, and clips any bounding boxes that partially overflow. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneOutsideWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def filter_groundtruth_with_nan_box_coordinates(tensor_dict): """Filters out groundtruth with no bounding boxes. Args: tensor_dict: a dictionary of following groundtruth tensors - fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_confidences fields.InputDataFields.groundtruth_keypoints fields.InputDataFields.groundtruth_instance_masks fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_label_types Returns: a dictionary of tensors containing only the groundtruth that have bounding boxes. """ groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes] nan_indicator_vector = tf.greater(tf.reduce_sum(tf.cast( tf.is_nan(groundtruth_boxes), dtype=tf.int32), reduction_indices=[1]), 0) valid_indicator_vector = tf.logical_not(nan_indicator_vector) valid_indices = tf.where(valid_indicator_vector) return retain_groundtruth(tensor_dict, valid_indices)
def correct_keypoints(image_shape, keypoints): """ Arguments: image_shape: an int tensor with shape [3]. keypoints: an int tensor with shape [num_persons, 17, 3]. Returns: an int tensor with shape [num_persons, 17, 3]. """ y, x, v = tf.split(keypoints, 3, axis=2) height = image_shape[0] width = image_shape[1] coordinate_violations = tf.concat([ tf.less(y, 0), tf.less(x, 0), tf.greater_equal(y, height), tf.greater_equal(x, width) ], axis=2) # shape [num_persons, 17, 4] valid_indicator = tf.logical_not( tf.reduce_any(coordinate_violations, axis=2)) valid_indicator = tf.expand_dims(valid_indicator, 2) # it has shape [num_persons, 17, 1] v *= tf.to_int32(valid_indicator) keypoints = tf.concat([y, x, v], axis=2) return keypoints
def call(self, similarity, mask=None): """ Args: similarity: a Tensor with shape [batch_size, heads (optional), q/k_length, q/k_length] mask: a Tensor with shape [batch_size, q/k_length, q/k_length] Returns: masked_similarity: a Tensor with shape [batch_size, heads (optional), q/k_length, q/k_length] """ if mask is None: return similarity similarity_rank_assert = tf.assert_rank_in(similarity, (3, 4)) mask_rank_assert = tf.assert_rank(mask, 3) # There are so many different reasons a mask might be constructed a particular manner. # Because of this we don't want to infer a particular construction. with tf.control_dependencies( [similarity_rank_assert, mask_rank_assert]): # If shapes don't match, then similarity has been split for multi-headed attention if len(mask.shape) != len(similarity.shape): similarity[:, 0].shape.assert_is_compatible_with(mask.shape) mask = mask[:, None] else: similarity.shape.assert_is_compatible_with(mask.shape) # We know that we're passing this through a softmax later, thus just add a relatively large negative # value to mask the output avoids a hadamard product (though I think that technically it's not # any more efficient to do it this way operations wise) bias = -1e9 * tf.cast(tf.logical_not(mask), tf.float32) masked_similarity = similarity + bias return masked_similarity
def cond(ctx, cache, probs): # ctx = tf.Print(ctx,[tf.shape(ctx)]) is_eos = tf.reduce_all( tf.reduce_any(tf.equal(ctx[:, -1:], eos_token), axis=1)) is_max_len = tf.greater_equal(get_shape_list(probs)[1], max_len) is_min_len = tf.greater_equal(get_shape_list(probs)[1], min_len) first_cond = tf.logical_and(is_eos, is_min_len) return tf.logical_not(first_cond)
def cond_sufficient_descent(learning_rate_action, cond_sufficient_descent, cost_perturbed): del cost_perturbed cond_1 = tf.math.greater(learning_rate_action, self.learning_rate_action) return tf.math.logical_and( cond_1, tf.logical_not(cond_sufficient_descent))
def get_attention_bias(sequence_length): """Create attention bias so attention is not applied at padding position.""" # attention_bias: [batch, 1, 1, memory_length] invert_sequence_mask = tf.to_float(tf.logical_not(tf.sequence_mask( sequence_length))) attention_bias = common_attention.attention_bias_ignore_padding( invert_sequence_mask) return attention_bias
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError('indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32( negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def next_inputs(self, time, outputs, state, sample_ids, name=None): with tf.name_scope(name, "ScheduledOutputTrainingHelperNextInputs", [time, outputs, state, sample_ids]): (finished, base_next_inputs, state) = ( super(ScheduledOutputTrainingHelper, self).next_inputs( time=time, outputs=outputs, state=state, sample_ids=sample_ids, name=name)) sample_ids = tf.cast(sample_ids, tf.bool) def maybe_sample(): """Perform scheduled sampling.""" def maybe_concatenate_auxiliary_inputs(outputs_, indices=None): """Concatenate outputs with auxiliary inputs, if they exist.""" if self._auxiliary_input_tas is None: return outputs_ next_time = time + 1 auxiliary_inputs = tf.nest.map_structure( lambda ta: ta.read(next_time), self._auxiliary_input_tas) if indices is not None: auxiliary_inputs = tf.gather_nd(auxiliary_inputs, indices) return tf.nest.map_structure( lambda x, y: tf.concat((x, y), -1), outputs_, auxiliary_inputs) if self._next_inputs_fn is None: return tf.where( sample_ids, maybe_concatenate_auxiliary_inputs(outputs), base_next_inputs) where_sampling = tf.cast( tf.where(sample_ids), tf.int32) where_not_sampling = tf.cast( tf.where(tf.logical_not(sample_ids)), tf.int32) outputs_sampling = tf.gather_nd(outputs, where_sampling) inputs_not_sampling = tf.gather_nd(base_next_inputs, where_not_sampling) sampled_next_inputs = maybe_concatenate_auxiliary_inputs( self._next_inputs_fn(outputs_sampling), where_sampling) base_shape = tf.shape(base_next_inputs) return (tf.scatter_nd(indices=where_sampling, updates=sampled_next_inputs, shape=base_shape) + tf.scatter_nd(indices=where_not_sampling, updates=inputs_not_sampling, shape=base_shape)) all_finished = tf.reduce_all(finished) no_samples = tf.logical_not(tf.reduce_any(sample_ids)) next_inputs = tf.cond( tf.logical_or(all_finished, no_samples), lambda: base_next_inputs, maybe_sample) return (finished, next_inputs, state)
def lengths_to_area_mask(feature_length, length, max_area_size): """Generates a non-padding mask for areas based on lengths. Args: feature_length: a tensor of [batch_size] length: the length of the batch max_area_size: the maximum area size considered Returns: mask: a tensor in shape of [batch_size, num_areas] """ paddings = tf.cast(tf.expand_dims( tf.logical_not( tf.sequence_mask(feature_length, maxlen=length)), 2), tf.float32) _, _, area_sum, _, _ = compute_area_features(paddings, max_area_width=max_area_size) mask = tf.squeeze(tf.logical_not(tf.cast(area_sum, tf.bool)), [2]) return mask
def build_graph(self): input_dim_with_batch = (self.batchsize, self.num_frame_stack) + self.pic_size input_dim_general = (None, self.num_frame_stack) + self.pic_size self.input_prev_state = tf.placeholder(tf.float32, input_dim_general, "prev_state") self.input_next_state = tf.placeholder(tf.float32, input_dim_with_batch, "next_state") self.input_reward = tf.placeholder(tf.float32, self.batchsize, "reward") self.input_actions = tf.placeholder(tf.int32, self.batchsize, "actions") self.input_done_mask = tf.placeholder(tf.int32, self.batchsize, "done_mask") # These are the state action values for all states # The target Q-values come from the fixed network with tf.variable_scope("fixed"): qsa_targets = self.create_network(self.input_next_state, trainable=False) with tf.variable_scope("train"): qsa_estimates = self.create_network(self.input_prev_state, trainable=True) self.best_action = tf.argmax(qsa_estimates, axis=1) not_done = tf.cast( tf.logical_not(tf.cast(self.input_done_mask, "bool")), "float32") q_target = tf.reduce_max( qsa_targets, -1) * self.gamma * not_done + self.input_reward # select the chosen action from each row # in numpy this is qsa_estimates[range(batchsize), self.input_actions] action_slice = tf.stack( [tf.range(0, self.batchsize), self.input_actions], axis=1) q_estimates_for_input_action = tf.gather_nd(qsa_estimates, action_slice) training_loss = tf.nn.l2_loss( q_target - q_estimates_for_input_action) / self.batchsize optimizer = tf.train.AdamOptimizer(**(self.optimizer_params)) reg_loss = tf.add_n(tf.losses.get_regularization_losses()) self.train_op = optimizer.minimize(reg_loss + training_loss) train_params = self.get_variables("train") fixed_params = self.get_variables("fixed") assert (len(train_params) == len(fixed_params)) self.copy_network_ops = [ tf.assign(fixed_v, train_v) for train_v, fixed_v in zip(train_params, fixed_params) ]
def infer_step(result, length): """Inference step.""" def print_info(result, length, new_length): vocab = self.problem_hparams.vocabulary["targets"] tf.logging.info( "length=%s new_length=%s length_diff=%s new_suffix=%s", length, new_length, new_length - length, str([ vocab._subtoken_id_to_subtoken_string(index) # pylint: disable=protected-access for index in result[0, -block_size:, 0, 0][:new_length - length] ]).decode("unicode-escape"), ) features["targets"] = tf.pad(result, [[0, 0], [0, 1], [0, 0], [0, 0]]) samples, logits, losses = self.sample(features) # pylint: disable=unused-variable _, top_k_indices = tf.nn.top_k( logits[:, :-1, :1, :, :], k=self._decode_hparams.guess_and_check_top_k) in_top_k = tf.reduce_any(tf.equal(tf.to_int64(top_k_indices), tf.expand_dims(result, 4)), axis=4) eos_cumsum = tf.cumsum(tf.to_int32( tf.equal(result, text_encoder.EOS_ID)), axis=1) after_eos = tf.greater(common_layers.shift_right(eos_cumsum), 0) correct = tf.logical_and(in_top_k, tf.logical_not(after_eos)) correct_cumsum = tf.cumsum(tf.to_int32(correct), axis=1) perfect_cumsum = 1 + tf.range(tf.shape(correct)[1]) for axis in [0, 2, 3]: perfect_cumsum = tf.expand_dims(perfect_cumsum, axis=axis) new_length = tf.reduce_sum(tf.to_int32( tf.equal(correct_cumsum, perfect_cumsum)), axis=1) new_length = tf.squeeze(new_length, axis=[0, 1, 2]) new_length = tf.minimum(new_length, decode_length) new_result = tf.concat([ result[:, :new_length, :, :], tf.reshape(samples[:, new_length, :block_size, :], [1, block_size, 1, 1]) ], axis=1) with tf.control_dependencies( [tf.py_func(print_info, [result, length, new_length], [])]): new_result = tf.identity(new_result) return new_result, new_length
def get_cross_block_att(block_ids, block_pos, all_block_ids, all_block_pos, cross_block_attention_mode, cast_to_int32=True): """Computes attention mask between blocks based on their document IDs.""" # [batch_size, 1] block_ids_expanded = tf.expand_dims(block_ids, 1) # [1, global_batch_size] all_block_ids_expanded = tf.expand_dims(all_block_ids, 0) # [batch_size, 1] block_pos_expanded = tf.expand_dims(block_pos, 1) # [1, global_batch_size] all_block_pos_expanded = tf.expand_dims(all_block_pos, 0) # [batch_size, global_batch_size] cross_block_attention = tf.logical_and( tf.not_equal(block_ids_expanded, 0), tf.not_equal(all_block_ids_expanded, 0)) if cross_block_attention_mode == "doc": # [batch_size, global_batch_size] cross_block_attention = tf.logical_and( tf.equal(block_ids_expanded, all_block_ids_expanded), cross_block_attention) elif cross_block_attention_mode == "block": # [batch_size, global_batch_size] cross_block_attention = tf.logical_and( tf.equal(block_ids_expanded, all_block_ids_expanded), cross_block_attention) cross_block_attention = tf.logical_and( tf.equal(block_pos_expanded, all_block_pos_expanded), cross_block_attention) elif cross_block_attention_mode == "other_blocks": is_the_same_doc = tf.equal(block_ids_expanded, all_block_ids_expanded) is_the_same_block = tf.logical_and( tf.equal(block_pos_expanded, all_block_pos_expanded), is_the_same_doc) is_the_same_doc_but_not_block = tf.logical_and( is_the_same_doc, tf.logical_not(is_the_same_block)) cross_block_attention = tf.logical_and(is_the_same_doc_but_not_block, cross_block_attention) elif cross_block_attention_mode == "batch": pass else: raise ValueError("Unknown cross_block_attention_mode: " + cross_block_attention_mode) if cast_to_int32: cross_block_attention = tf.cast(cross_block_attention, dtype=tf.int32) return cross_block_attention
def verb_refs_to_lengths(task, verb_refs, include_eos=True): """Computes the length of a sequence.""" eos_positions = tf.to_int32(tf.expand_dims( tf.where(tf.equal(task, 1))[:, 1], 1)) seq_mask = tf.logical_not(tf.cast(tf.cumsum(tf.to_int32( tf.logical_and( tf.equal(verb_refs[:, :, 0], eos_positions), tf.equal(verb_refs[:, :, 1], eos_positions + 1))), axis=-1), tf.bool)) lengths = tf.reduce_sum(tf.to_float(seq_mask), axis=-1) if include_eos: lengths = lengths + 1 return lengths
def _top_p_sample(logits, ignore_ids=None, num_samples=1, p=0.9): """ Does top-p sampling. if ignore_ids is on, then we will zero out those logits. :param logits: [batch_size, vocab_size] tensor :param ignore_ids: [vocab_size] one-hot representation of the indices we'd like to ignore and never predict, like padding maybe :param p: topp threshold to use, either a float or a [batch_size] vector :return: [batch_size, num_samples] samples # TODO FIGURE OUT HOW TO DO THIS ON TPUS. IT'S HELLA SLOW RIGHT NOW, DUE TO ARGSORT I THINK """ with tf.variable_scope('top_p_sample'): batch_size, vocab_size = get_shape_list(logits, expected_rank=2) probs = tf.nn.softmax(logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10, axis=-1) if isinstance(p, float) and p > 0.999999: # Don't do top-p sampling in this case print("Top-p sampling DISABLED", flush=True) return { 'probs': probs, 'sample': tf.random.categorical( logits=logits if ignore_ids is None else logits - tf.cast(ignore_ids[None], tf.float32) * 1e10, num_samples=num_samples, dtype=tf.int32), } # [batch_size, vocab_perm] indices = tf.argsort(probs, direction='DESCENDING') cumulative_probabilities = tf.math.cumsum(tf.batch_gather(probs, indices), axis=-1, exclusive=False) # find the top pth index to cut off. careful we don't want to cutoff everything! # result will be [batch_size, vocab_perm] p_expanded = p if isinstance(p, float) else p[:, None] exclude_mask = tf.logical_not( tf.logical_or(cumulative_probabilities < p_expanded, tf.range(vocab_size)[None] < 1)) # OPTION A - sample in the sorted space, then unsort. logits_to_use = tf.batch_gather(logits, indices) - tf.cast(exclude_mask, tf.float32) * 1e10 sample_perm = tf.random.categorical(logits=logits_to_use, num_samples=num_samples) sample = tf.batch_gather(indices, sample_perm) # OPTION B - unsort first - Indices need to go back to 0 -> N-1 -- then sample # unperm_indices = tf.argsort(indices, direction='ASCENDING') # include_mask_unperm = tf.batch_gather(include_mask, unperm_indices) # logits_to_use = logits - (1 - tf.cast(include_mask_unperm, tf.float32)) * 1e10 # sample = tf.random.categorical(logits=logits_to_use, num_samples=num_samples, dtype=tf.int32) return { 'probs': probs, 'sample': sample, }
def metric_fn(query_mask, block_mask, labels, predictions, mask_query): masked_accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions, weights=mask_query) unmasked_accuracy = tf.metrics.accuracy( labels=labels, predictions=predictions, weights=tf.logical_not(mask_query)) return dict(query_non_padding=tf.metrics.mean(query_mask), block_non_padding=tf.metrics.mean(block_mask), actual_mask_ratio=tf.metrics.mean(mask_query), masked_accuracy=masked_accuracy, unmasked_accuracy=unmasked_accuracy)
def expand_labels(relation_tensor, confidence_value): """Expand to ancestors or descendants depending on arguments.""" mask = tf.equal(image_confidences, confidence_value) target_image_classes = tf.boolean_mask(image_classes, mask) expanded_indices = tf.reduce_any((tf.gather( relation_tensor, target_image_classes - _LABEL_OFFSET, axis=0) > 0), axis=0) expanded_indices = tf.where(expanded_indices)[:, 0] + _LABEL_OFFSET new_groundtruth_image_classes = ( tf.concat([ tf.boolean_mask(image_classes, tf.logical_not(mask)), expanded_indices, ], axis=0)) new_groundtruth_image_confidences = ( tf.concat([ tf.boolean_mask(image_confidences, tf.logical_not(mask)), tf.ones([tf.shape(expanded_indices)[0]], dtype=image_confidences.dtype) * confidence_value, ], axis=0)) return new_groundtruth_image_classes, new_groundtruth_image_confidences
def do_process_boundary(start_points, end_points, input_length, t1_id, t2_id, all_tokenized_diag): """function that contains the majority of the logic to proess boundary.""" masks_start = tf.sequence_mask(start_points, input_length) masks_end = tf.sequence_mask(end_points, input_length) xor_masks = tf.logical_xor(masks_start, masks_end) mask1 = tf.reduce_any(xor_masks, axis=0) mask2 = tf.logical_not(mask1) all_turn1 = tf.equal(all_tokenized_diag, t1_id) all_turn2 = tf.equal(all_tokenized_diag, t2_id) turn_point = tf.logical_or(all_turn1, all_turn2) turn_point = tf.cast(turn_point, dtype=tf.float32) return mask1, mask2, turn_point