def prune_completely_outside_window(boxes, window): """ Prunes bounding boxes that fall completely outside of the given window. This function does not clip partially overflowing boxes. Arguments: boxes: a float tensor with shape [M_in, 4]. window: a float tensor with shape [4] representing [ymin, xmin, ymax, xmax] of the window. Returns: boxes: a float tensor with shape [M_out, 4] where 0 <= M_out <= M_in. valid_indices: a long tensor with shape [M_out] indexing the valid bounding boxes in the input 'boxes' tensor. """ y_min, x_min, y_max, x_max = tf.split(boxes, num_or_size_splits=4, axis=1) # they have shape [None, 1] win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) # they have shape [] coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], axis=1) valid_indices = tf.squeeze(tf.where( tf.logical_not(tf.reduce_any(coordinate_violations, 1))), axis=1) boxes = tf.gather(boxes, valid_indices) return boxes, valid_indices
def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split(value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def _divide_no_nan(x, y, epsilon=1e-8): """Equivalent to tf.math.divide_no_nan but supports bfloat16.""" # need manual broadcast... safe_y = tf.where( tf.logical_and(tf.greater_equal(y, -epsilon), tf.less_equal(y, epsilon)), tf.ones_like(y), y) return tf.where( tf.logical_and( tf.greater_equal(tf.broadcast_to(y, x.get_shape()), -epsilon), tf.less_equal(tf.broadcast_to(y, x.get_shape()), epsilon)), tf.zeros_like(x), x / safe_y)
def maybe_update_masks(): with tf.name_scope(self._spec.name): is_step_within_pruning_range = tf.logical_and( tf.greater_equal(self._global_step, self._spec.begin_pruning_step), # If end_pruning_step is negative, keep pruning forever! tf.logical_or( tf.less_equal(self._global_step, self._spec.end_pruning_step), tf.less(self._spec.end_pruning_step, 0))) is_pruning_step = tf.less_equal( tf.add(self._last_update_step, self._spec.pruning_frequency), self._global_step) return tf.logical_and(is_step_within_pruning_range, is_pruning_step)
def images_have_overlap(trip, min_ratio, max_ratio): """Checks if images have any overlap.""" # the y axis in image coordinates increases from top to bottom. mask1_in_2, mask2_in_1 = trip.mask[0], trip.mask[1] shape = mask1_in_2.shape.as_list() height, width = shape[0], shape[1] ratio1 = tf.reduce_sum(tf.cast(mask1_in_2, tf.float32)) / (height * width) ratio2 = tf.reduce_sum(tf.cast(mask2_in_1, tf.float32)) / (height * width) cond1 = tf.logical_and(tf.less_equal(ratio1, max_ratio), tf.less_equal(ratio2, max_ratio)) cond2 = tf.logical_and(tf.greater_equal(ratio1, min_ratio), tf.greater_equal(ratio2, min_ratio)) return tf.logical_and(cond1, cond2)
def maybe_apply_compression(): """Decide whether global step is within compression range. Returns: is_step_within_compression_range: bool. """ with tf.compat.v1.name_scope(self._spec.name): global_step = self._global_step def real_global_step_fn(): return global_step def mock_global_step_fn(): return self._spec.begin_compression_step global_step = tf.cond( tf.constant(global_step is None, dtype=tf.bool), mock_global_step_fn, real_global_step_fn) is_step_within_compression_range = tf.logical_and( tf.greater_equal( tf.cast(global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal( tf.cast(global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) return is_step_within_compression_range
def oversample_classes(example): """ Returns the number of copies of given example """ import pdb pdb.set_trace() class_prob = example['class_prob'] class_target_prob = example['class_target_prob'] prob_ratio = tf.cast(class_target_prob / class_prob, dtype=tf.float32) # soften ratio is oversampling_coef==0 we recover original distribution prob_ratio = prob_ratio**oversampling_coef # for classes with probability higher than class_target_prob we # want to return 1 prob_ratio = tf.maximum(prob_ratio, 1) # for low probability classes this number will be very large repeat_count = tf.floor(prob_ratio) # prob_ratio can be e.g 1.9 which means that there is still 90% # of change that we should return 2 instead of 1 repeat_residual = prob_ratio - repeat_count # a number between 0-1 residual_acceptance = tf.less_equal( tf.random_uniform([], dtype=tf.float32), repeat_residual) residual_acceptance = tf.cast(residual_acceptance, tf.int64) repeat_count = tf.cast(repeat_count, dtype=tf.int64) return repeat_count + residual_acceptance
def area_range_to_index(area_range, length, max_area_width): """Computes the indices of each area in the area expansion. Args: area_range: tensor in shape of [batch_size, 2] length: a scalar tensor gives the length of the original feature space. max_area_width: a constant scalar. Returns: indices: area indices tensor in shape of [batch_size] """ with tf.control_dependencies([ tf.assert_equal(tf.rank(area_range), 2), tf.assert_equal(tf.shape(area_range)[1], 2) ]): area_range = tf.cast(area_range, tf.int32) target_size = area_range[:, 1] - area_range[:, 0] with tf.control_dependencies( [tf.assert_less(target_size, max_area_width + 1, summarize=100000)]): sizes = target_size - 1 start_length = length pre_end_length = length - sizes + 1 base = (start_length + pre_end_length) *\ (start_length - pre_end_length + 1) // 2 base = tf.where(tf.less_equal(target_size, 1), tf.zeros_like(target_size), base) offset = area_range[:, 0] return base + offset
def body_sufficient_descent(learning_rate_action, cond_sufficient_descent, cost_perturbed, c_armijo=0.01, c_goldstein=0.25, lr_decay=0.1): """Function for sufficient descent.""" del cond_sufficient_descent, cost_perturbed action_variable_perturbed_tensor = self._action_variable_tensor - \ learning_rate_action * self.normalized_action_gradient cost_perturbed = -tf.reduce_mean( self._build_q_function_net( self._state_tensor, action_variable_perturbed_tensor)) # Here the negative gradient corresponds to maximization of Q fun. sufficient_descent = tf.reduce_sum( self.action_gradient * -self.normalized_action_gradient) goldstein_condition = tf.greater_equal( cost_perturbed, self.cost_now + c_goldstein * learning_rate_action * sufficient_descent) armijo_condition = tf.less_equal( cost_perturbed, self.cost_now + c_armijo * learning_rate_action * sufficient_descent) cond_sufficient_descent = tf.logical_and( goldstein_condition, armijo_condition) with tf.control_dependencies([cond_sufficient_descent]): learning_rate_action = learning_rate_action * lr_decay return learning_rate_action, cond_sufficient_descent, cost_perturbed
def lenpred_stats(targets_length_pred, targets_length): lenpred_diff = tf.abs(targets_length_pred - tf.cast(targets_length, tf.int32)) lenpred_acc = tf.cast(tf.equal(lenpred_diff, 0), tf.float32) lenpred_acc = tf.reduce_mean(lenpred_acc) lenpred_acc5 = tf.cast(tf.less_equal(lenpred_diff, 5), tf.float32) lenpred_acc5 = tf.reduce_mean(lenpred_acc5) return lenpred_acc, lenpred_acc5
def maybe_apply_compression(): """Decide whether global step is within compression range. Returns: is_step_within_compression_range: bool. """ with tf.compat.v1.name_scope(self._spec.name): # Compress if current step is more than begin_compression_step and # less than end_compression_step (unless it's negative) global_step = tf.train.get_global_step() def real_global_step_fn(): return tf.cast(tf.train.get_global_step(), tf.int32) def mock_global_step_fn(): return self._spec.begin_compression_step def is_global_step_none(global_step): return tf.constant(global_step is None, dtype=tf.bool) global_step = tf.cond(is_global_step_none(global_step), mock_global_step_fn, real_global_step_fn) is_step_within_compression_range = tf.logical_and( tf.greater_equal( tf.cast(global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal( tf.cast(global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) return is_step_within_compression_range
def sample(self, features, decode_step, cache, decoding_stats): """Sample step for infer.""" with tf.variable_scope('sparse_transformer/body', reuse=tf.AUTO_REUSE): logits = self.body(features, decode_step, cache, decoding_stats) if not self.hparams.fast_decode: logits = tf.gather(logits, decode_step, axis=1) logits = tf.reshape(logits, [self.batch_size, self.vocab_size]) # Should not use top_k and top_p together # assert ( # self.hparams.sampling_keep_top_k # * (1 - self.hparams.nucleus_sampling) # == 0 # ) if self.hparams.sampling_keep_top_k: tf.logging.info('Top-k sampling top_k = {}'.format( self.hparams.sampling_keep_top_k)) values, _ = tf.math.top_k(logits, k=self.hparams.sampling_keep_top_k) k_largest = tf.reduce_min(values) logits = tf.where( tf.less_equal(logits, k_largest), tf.ones_like(logits) * -1e9, logits, ) if self.hparams.nucleus_sampling < 1: logits = self.nucleus_sampling(logits) sample = self.multinomial_squeeze(logits, self.hparams.sampling_temp) sample = tf.reshape(sample, [self.batch_size]) return sample, logits
def update_masks(): """check whether all pruning conditions are met before pruning.""" with tf.name_scope(self._spec.name): is_step_within_pruning_range = tf.logical_and( tf.greater_equal(self._global_step, self._spec.begin_pruning_step), # If end_pruning_step is negative, keep pruning forever! tf.logical_or( tf.less_equal(self._global_step, self._spec.end_pruning_step), tf.less(self._spec.end_pruning_step, 0))) is_pruning_step = tf.less_equal( tf.add(self._last_update_step, self._spec.pruning_frequency), self._global_step) return tf.logical_and(is_step_within_pruning_range, is_pruning_step)
def stochastic_sigmoid(x, args): shape_x = tf.shape(x) prob = tf.sigmoid(x) prob_mask = tf.less_equal(tf.random.uniform(shape_x, dtype=tf.float64), prob) return tf.where(prob_mask,\ tf.ones(shape_x, dtype=tf.float64),\ tf.zeros(shape_x, dtype=tf.float64))
def zero_out_non_min_distances(dist, n_boundaries): n_units = dist.shape[1] dist = dist + 10**-5 * (np.random.rand(n_units) - 0.5) # to break ties and don't count more than k zeros in CNNs min_dist_rb = get_min_distances(dist, n_boundaries) # bs x n_boundaries th1 = tf.expand_dims(tf.reduce_max(min_dist_rb, axis=1), 1) # bs x 1 - take the maximum distance over min-k th1 = tf.tile(th1, [1, n_units]) th = tf.cast(tf.less_equal(dist, th1), tf.float32) # only entries that are <= max over min-k are 1, else 0 return th
def build_uda_cross_entropy(params, model, all_images, l_labels): """Compute the UDA loss.""" train_batch_size = params.train_batch_size num_replicas = params.num_replicas uda_data = params.uda_data batch_size = train_batch_size // num_replicas labels = {} if l_labels.dtype == tf.int32: # l_labels is sparse. turn into one_hot labels['l'] = tf.one_hot(l_labels, params.num_classes, dtype=tf.float32) else: labels['l'] = l_labels global_step = tf.train.get_or_create_global_step() masks = {} logits = {} cross_entropy = {} all_logits = model(all_images, training=True) logits['l'], logits['u_ori'], logits['u_aug'] = tf.split( all_logits, [batch_size, batch_size * uda_data, batch_size * uda_data], 0) # sup loss cross_entropy['l'] = tf.losses.softmax_cross_entropy( onehot_labels=labels['l'], logits=logits['l'], label_smoothing=params.label_smoothing, reduction=tf.losses.Reduction.NONE) probs = tf.nn.softmax(logits['l'], axis=-1) correct_probs = tf.reduce_sum(labels['l'] * probs, axis=-1) r = tf.cast(global_step, tf.float32) / float(params.num_train_steps) l_threshold = r * (1. - 1. / params.num_classes) + 1. / params.num_classes masks['l'] = tf.less_equal(correct_probs, l_threshold) masks['l'] = tf.cast(masks['l'], tf.float32) masks['l'] = tf.stop_gradient(masks['l']) cross_entropy['l'] = tf.reduce_sum( cross_entropy['l']) / float(train_batch_size) # unsup loss labels['u_ori'] = tf.nn.softmax(logits['u_ori'] / params.uda_temp, axis=-1) labels['u_ori'] = tf.stop_gradient(labels['u_ori']) cross_entropy['u'] = (labels['u_ori'] * tf.nn.log_softmax(logits['u_aug'], axis=-1)) largest_probs = tf.reduce_max(labels['u_ori'], axis=-1, keepdims=True) masks['u'] = tf.greater_equal(largest_probs, params.uda_threshold) masks['u'] = tf.cast(masks['u'], tf.float32) masks['u'] = tf.stop_gradient(masks['u']) cross_entropy['u'] = tf.reduce_sum( -cross_entropy['u'] * masks['u']) / float( train_batch_size * uda_data) return logits, labels, masks, cross_entropy
def _subsample_selection_to_desired_neg_pos_ratio( self, indices, match, max_negatives_per_positive, min_negatives_per_image=0): """Subsample a collection of selected indices to a desired neg:pos ratio. This function takes a subset of M indices (indexing into a large anchor collection of N anchors where M<N) which are labeled as positive/negative via a Match object (matched indices are positive, unmatched indices are negative). It returns a subset of the provided indices retaining all positives as well as up to the first K negatives, where: K=floor(num_negative_per_positive * num_positives). For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors), with positives=[2, 5] and negatives=[4, 7, 9, 10] and num_negatives_per_positive=1, then the returned subset of indices is [2, 4, 5, 7]. Args: indices: An integer tensor of shape [M] representing a collection of selected anchor indices match: A matcher.Match object encoding the match between anchors and groundtruth boxes for a given image, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. max_negatives_per_positive: (float) maximum number of negatives for each positive anchor. min_negatives_per_image: minimum number of negative anchors for a given image. Allow sampling negatives in image without any positive anchors. Returns: selected_indices: An integer tensor of shape [M'] representing a collection of selected anchor indices with M' <= M. num_positives: An integer tensor representing the number of positive examples in selected set of indices. num_negatives: An integer tensor representing the number of negative examples in selected set of indices. """ positives_indicator = tf.gather(match.matched_column_indicator(), indices) negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices) num_positives = tf.reduce_sum( tf.cast(positives_indicator, dtype=tf.int32)) max_negatives = tf.maximum( min_negatives_per_image, tf.cast(max_negatives_per_positive * tf.cast(num_positives, dtype=tf.float32), dtype=tf.int32)) topk_negatives_indicator = tf.less_equal( tf.cumsum(tf.cast(negatives_indicator, dtype=tf.int32)), max_negatives) subsampled_selection_indices = tf.where( tf.logical_or(positives_indicator, topk_negatives_indicator)) num_negatives = tf.size(subsampled_selection_indices) - num_positives return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]), num_positives, num_negatives)
def maybe_update_gradients(): with tf.name_scope(self._spec.name): is_step_within_pruning_range = tf.logical_and( tf.greater_equal(self._global_step, self._spec.begin_pruning_step), # If end_pruning_step is negative, keep pruning forever! tf.logical_or( tf.less_equal(self._global_step, self._spec.end_pruning_step), tf.less(self._spec.end_pruning_step, 0))) return is_step_within_pruning_range
def example_to_bucket_id(example_input, example_target): """Return int64 bucket id for this example, calculated based on length.""" seq_length = _get_example_length((example_input, example_target)) # TODO: investigate whether removing code branching improves performance. conditions_c = tf.logical_and(tf.less_equal(buckets_min, seq_length), tf.less(seq_length, buckets_max)) bucket_id = tf.reduce_min(tf.where(conditions_c)) return bucket_id
def build_graph(parameters): """Build the less_equal op testing graph.""" input_value1 = tf.compat.v1.placeholder( dtype=parameters["input_dtype"], name="input1", shape=parameters["input_shape_pair"][0]) input_value2 = tf.compat.v1.placeholder( dtype=parameters["input_dtype"], name="input2", shape=parameters["input_shape_pair"][1]) out = tf.less_equal(input_value1, input_value2) return [input_value1, input_value2], [out]
def body_inner(j, row, grad): assert isinstance(row, tf.TensorArray) grad_to_write = tf.transpose(grad, [1, 0]) # Write grad to row row = row.write( j - 1, tf.cond(tf.less_equal(j, i), lambda: grad_to_write, lambda: tf.zeros_like(grad_to_write))) # Update grad and return grad = tf.matmul(dsds[j - 1], grad) return j - 1, row, grad
def body_gradient_ascent(itr, cond_terminate, lr_init=100.0): """Function for gradient descent.""" del cond_terminate if self.sufficient_ascent_flag: # first calculate sufficeint descent result_sufficient_descent = tf.while_loop( cond_sufficient_descent, body_sufficient_descent, [ tf.constant(lr_init), tf.constant(False), tf.constant(np.inf) ]) lr_action = result_sufficient_descent[0] cost_perturbed = result_sufficient_descent[2] cond_terminate = tf.less_equal( tf.math.abs(cost_perturbed - self.cost_now), self._tolerance_tensor) else: # no sufficient descent step lr_action = self.learning_rate_ga action_variable_perturbed_tensor = self._action_variable_tensor - \ lr_action * self.normalized_action_gradient cost_perturbed = -tf.reduce_mean( self._build_q_function_net( self._state_tensor, action_variable_perturbed_tensor)) cond_terminate = tf.less_equal( tf.math.abs(cost_perturbed - self.cost_now), self._tolerance_tensor) train_op = tf.train.GradientDescentOptimizer( learning_rate=lr_action).apply_gradients( grads_and_vars=[(self.normalized_action_gradient, self._action_variable_tensor)]) # Ensure that the update is applied before continuing. with tf.control_dependencies([train_op]): itr = itr + 1 return itr, cond_terminate
def maybe_update_alpha(): """Operator to update alpha. Checks if global_step is between begin_compression_step and end_compression_step. """ with tf.compat.v1.name_scope(self._spec.name): # prune if current step is more than begin_compression_step and # less than end_compression_step (unless it's negative) is_step_within_compression_range = tf.logical_and( tf.greater_equal(tf.cast(self._global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal(tf.cast(self._global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) is_compression_step = tf.less_equal( tf.add(self._last_alpha_update_step, self._spec.compression_frequency), tf.cast(self._global_step, tf.int32)) return tf.logical_and(is_step_within_compression_range, is_compression_step)
def provide_dataset(self): """Provides dataset (audio, labels) of nsynth.""" length = 64000 channels = 1 pitch_counts = self.get_pitch_counts() pitches = sorted(pitch_counts.keys()) label_index_table = tf.lookup.StaticVocabularyTable( tf.lookup.KeyValueTensorInitializer(keys=pitches, values=np.arange(len(pitches)), key_dtype=tf.int64, value_dtype=tf.int64), num_oov_buckets=1) def _parse_nsynth(record): """Parsing function for NSynth dataset.""" features = { 'pitch': tf.FixedLenFeature([1], dtype=tf.int64), 'audio': tf.FixedLenFeature([length], dtype=tf.float32), 'qualities': tf.FixedLenFeature([10], dtype=tf.int64), 'instrument_source': tf.FixedLenFeature([1], dtype=tf.int64), 'instrument_family': tf.FixedLenFeature([1], dtype=tf.int64), } example = tf.parse_single_example(record, features) wave, label = example['audio'], example['pitch'] wave = spectral_ops.crop_or_pad(wave[tf.newaxis, :, tf.newaxis], length, channels)[0] one_hot_label = tf.one_hot(label_index_table.lookup(label), depth=len(pitches))[0] return wave, one_hot_label, label, example['instrument_source'] dataset = self._get_dataset_from_path() dataset = dataset.map(_parse_nsynth, num_parallel_calls=tf.data.experimental.AUTOTUNE) # Filter just specified instrument sources def _is_wanted_source(s): return tf.reduce_any( list(map(lambda q: tf.equal(s, q)[0], self._instrument_sources))) dataset = dataset.filter(lambda w, l, p, s: _is_wanted_source(s)) # Filter just specified pitches dataset = dataset.filter( lambda w, l, p, s: tf.greater_equal(p, self._min_pitch)[0]) dataset = dataset.filter( lambda w, l, p, s: tf.less_equal(p, self._max_pitch)[0]) dataset = dataset.map(lambda w, l, p, s: (w, l)) return dataset
def undersampling_filter(example): """ Computes if given example is rejected or not. """ class_prob = example['class_prob'] class_target_prob = example['class_target_prob'] prob_ratio = tf.cast(class_target_prob / class_prob, dtype=tf.float32) prob_ratio = prob_ratio**undersampling_coef prob_ratio = tf.minimum(prob_ratio, 1.0) acceptance = tf.less_equal(tf.random_uniform([], dtype=tf.float32), prob_ratio) return acceptance
def _prec_rec(conf_gt, conf_out, reg_gt, reg_out, config): """ Creates precision and recall metrics. Returns (precision, recall) conf_gt Ground truth confidence, i.e. 1 for close anchors, 0 for anchors that are too far off and -1 for anchors to be ignored. Must have shape (?, fh, fw, k). conf_out PPN confidence output, must have shape (?, fh, fw, k). reg_gt Ground truth point offsets, need only have valid values for the anchors with conf_gt of 1. Must have shape (?, fh, fw, 2k). reg_out PPN anchor offset output, must have shape (?, fh, fw, 2k). config The configuration dictionary. See ppn.config.ppn_config. """ import tensorflow.compat.v1 as tf score_thr = config['score_thr'] dist_thr = config['dist_thr'] # mask positive outputs and ground truths gt_pos_mask = tf.equal(conf_gt, 1) gt_pos_count = tf.count_nonzero(gt_pos_mask, dtype=tf.int32) out_pos_mask = tf.greater_equal(conf_out, score_thr) out_pos_count = tf.count_nonzero(out_pos_mask, dtype=tf.int32) # calculate regression distances dist = reg_gt - reg_out dist *= dist dist = tf.reduce_sum(dist, axis=3) close_mask = tf.less_equal(dist, dist_thr*dist_thr) # uses squared distance # calculate number of correct predictions correct_mask = tf.logical_and(tf.logical_and(gt_pos_mask, out_pos_mask), close_mask) correct_count = tf.count_nonzero(correct_mask, dtype=tf.float32) precision = tf.where(tf.equal(out_pos_count, 0), 0.0, correct_count / tf.cast(out_pos_count, tf.float32)) recall = tf.where(tf.equal(out_pos_count, 0), 0.0, correct_count / tf.cast(gt_pos_count, tf.float32)) return tf.stop_gradient(precision), tf.stop_gradient(recall)
def image_overlap(depth1, pose1_c2w, depth2, pose2_c2w, intrinsics): """Determines the overlap of two images.""" pose1_w2c = tf.matrix_inverse( tf.concat([pose1_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3] pose2_w2c = tf.matrix_inverse( tf.concat([pose2_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3] p_world1 = camera_to_world_projection(depth1, intrinsics, pose1_c2w) p_image1_in_2, z1_c2 = world_to_camera_projection(p_world1, intrinsics, pose2_w2c) p_world2 = camera_to_world_projection(depth2, intrinsics, pose2_c2w) p_image2_in_1, z2_c1 = world_to_camera_projection(p_world2, intrinsics, pose1_w2c) shape = depth1.shape.as_list() height, width = shape[0], shape[1] height = tf.cast(height, tf.float32) width = tf.cast(width, tf.float32) mask_h2_in_1 = tf.logical_and( tf.less_equal(p_image2_in_1[:, :, 1], height), tf.greater_equal(p_image2_in_1[:, :, 1], 0.)) mask_w2_in_1 = tf.logical_and(tf.less_equal(p_image2_in_1[:, :, 0], width), tf.greater_equal(p_image2_in_1[:, :, 0], 0.)) mask2_in_1 = tf.logical_and(tf.logical_and(mask_h2_in_1, mask_w2_in_1), z2_c1 > 0) mask_h1_in_2 = tf.logical_and( tf.less_equal(p_image1_in_2[:, :, 1], height), tf.greater_equal(p_image1_in_2[:, :, 1], 0.)) mask_w1_in_2 = tf.logical_and(tf.less_equal(p_image1_in_2[:, :, 0], width), tf.greater_equal(p_image1_in_2[:, :, 0], 0.)) mask1_in_2 = tf.logical_and(tf.logical_and(mask_h1_in_2, mask_w1_in_2), z1_c2 > 0) return mask1_in_2, mask2_in_1
def filter_by_num_objects(self, d): if "visibility" not in d: return tf.constant(True) min_num_objects = self.max_num_objects or 0 max_num_objects = self.max_num_objects or 6 min_predicate = tf.greater_equal( tf.reduce_sum(d["visibility"]), tf.constant(min_num_objects - 1e-5, dtype=tf.float32), ) max_predicate = tf.less_equal( tf.reduce_sum(d["visibility"]), tf.constant(max_num_objects + 1e-5, dtype=tf.float32), ) return tf.logical_and(min_predicate, max_predicate)
def p_sample_loop_trajectory(self, denoise_fn, *, shape, noise_fn=tf.random_normal, repeat_noise_steps=-1): """ Generate samples, returning intermediate images Useful for visualizing how denoised images evolve over time Args: repeat_noise_steps (int): Number of denoising timesteps in which the same noise is used across the batch. If >= 0, the initial noise is the same for all batch elemements. """ i_0 = tf.constant(self.num_timesteps - 1, dtype=tf.int32) assert isinstance(shape, (tuple, list)) img_0 = noise_like(shape, noise_fn, repeat_noise_steps >= 0) times = tf.Variable([i_0]) imgs = tf.Variable([img_0]) # Steps with repeated noise times, imgs = tf.while_loop( cond=lambda times_, _: tf.less_equal(self.num_timesteps - times_[-1], repeat_noise_steps), body=lambda times_, imgs_: [ tf.concat([times_, [times_[-1] - 1]], 0), tf.concat([imgs_, [self.p_sample(denoise_fn=denoise_fn, x=imgs_[-1], t=tf.fill([shape[0]], times_[-1]), noise_fn=noise_fn, repeat_noise=True)]], 0) ], loop_vars=[times, imgs], shape_invariants=[tf.TensorShape([None, *i_0.shape]), tf.TensorShape([None, *img_0.shape])], back_prop=False ) # Steps with different noise for each batch element times, imgs = tf.while_loop( cond=lambda times_, _: tf.greater_equal(times_[-1], 0), body=lambda times_, imgs_: [ tf.concat([times_, [times_[-1] - 1]], 0), tf.concat([imgs_, [self.p_sample(denoise_fn=denoise_fn, x=imgs_[-1], t=tf.fill([shape[0]], times_[-1]), noise_fn=noise_fn, repeat_noise=False)]], 0) ], loop_vars=[times, imgs], shape_invariants=[tf.TensorShape([None, *i_0.shape]), tf.TensorShape([None, *img_0.shape])], back_prop=False ) assert imgs[-1].shape == shape return times, imgs
def assert_box_normalized(boxes, maximum_normalized_coordinate=1.1): """Asserts the input box tensor is normalized. Args: boxes: a tensor of shape [N, 4] where N is the number of boxes. maximum_normalized_coordinate: Maximum coordinate value to be considered as normalized, default to 1.1. Returns: a tf.Assert op which fails when the input box tensor is not normalized. Raises: ValueError: When the input box tensor is not normalized. """ box_minimum = tf.reduce_min(boxes) box_maximum = tf.reduce_max(boxes) return tf.Assert( tf.logical_and( tf.less_equal(box_maximum, maximum_normalized_coordinate), tf.greater_equal(box_minimum, 0)), [boxes])