def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal( unique_indices, i * tf.ones(num_boxes, dtype=tf.int64)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense( box_indices, [num_classes], tf.constant(1, dtype=tf.int64), validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings
def _count_matrix_input(self, filenames, submatrix_rows, submatrix_cols): """Creates ops that read submatrix shards from disk.""" random.shuffle(filenames) filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ 'global_row': tf.FixedLenFeature([submatrix_rows], dtype=tf.int64), 'global_col': tf.FixedLenFeature([submatrix_cols], dtype=tf.int64), 'sparse_local_row': tf.VarLenFeature(dtype=tf.int64), 'sparse_local_col': tf.VarLenFeature(dtype=tf.int64), 'sparse_value': tf.VarLenFeature(dtype=tf.float32) }) global_row = features['global_row'] global_col = features['global_col'] sparse_local_row = features['sparse_local_row'].values sparse_local_col = features['sparse_local_col'].values sparse_count = features['sparse_value'].values sparse_indices = tf.concat( axis=1, values=[tf.expand_dims(sparse_local_row, 1), tf.expand_dims(sparse_local_col, 1)]) count = tf.sparse_to_dense(sparse_indices, [submatrix_rows, submatrix_cols], sparse_count) return global_row, global_col, count
def build_graph(parameters): """Build the sparse_to_dense op testing graph.""" dense_shape = parameters["dense_shape"] # Special handle for value_is_scalar case. # value_count must be 1. if parameters["value_is_scalar"] and parameters["value_count"] == 1: value = tf.compat.v1.placeholder(name="value", dtype=parameters["value_dtype"], shape=()) else: value = tf.compat.v1.placeholder(name="value", dtype=parameters["value_dtype"], shape=[parameters["value_count"]]) indices = set() while len(indices) < parameters["value_count"]: indices.add(generate_index(dense_shape)) indices = tf.constant(tuple(indices), dtype=parameters["index_dtype"]) # TODO(renjieliu): Add test for validate_indices case. out = tf.sparse_to_dense(indices, dense_shape, value, parameters["default_value"], validate_indices=False) return [value], [out]
def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False) return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label)
def loss(logits, labels): """Calculates the loss from the logits and the labels. Args: logits: input tensor, float - [batch_size, NUM_CLASSES]. labels: Labels tensor, int32 - [batch_size]. Returns: loss: Loss tensor of type float. """ # Convert from sparse integer labels in the range [0, NUM_CLASSES) # to 1-hot dense float vectors (that is we will have batch_size vectors, # each with NUM_CLASSES values, all of which are 0.0 except there will # be a 1.0 in the entry corresponding to the label). batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size), 1) concated = tf.concat([indices, labels], 1) onehot_labels = tf.sparse_to_dense(concated, tf.shape(logits), 1.0, 0.0) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=onehot_labels, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='loss') tf.summary.scalar('summary/loss', loss) return loss
def threshold_vec(vec, target_sparsity): num_params = vec.shape.as_list()[0] kappa = int(round(num_params * (1. - target_sparsity))) topk, ind = tf.nn.top_k(vec, k=kappa, sorted=True) mask_sparse_v = tf.sparse_to_dense(ind, tf.shape(vec), tf.ones_like(ind, dtype=tf.float32), validate_indices=False) return mask_sparse_v
def _dense_intersect_indices(tensor, required_sp_tensor): required_2d_indices = required_sp_tensor.indices[:, 0:2] values = tf.gather_nd(tensor, required_2d_indices) indices, max_len = _example_index_to_sparse_index( tf.to_int32(required_sp_tensor.indices[:, 0]), tf.to_int32(required_sp_tensor.dense_shape[0])) return tf.expand_dims( tf.sparse_to_dense( indices, tf.stack([required_sp_tensor.dense_shape[0], max_len]), values), axis=2)
def threshold_vec(vec, target_sparsity): num_params = vec.shape.as_list()[0] # Calculate how much parameter to leave using `target_sparsity` # `kappa` - number of remained parameter after pruning kappa = int(round(num_params * (1. - target_sparsity))) # Choosing the weight to leave (number: kappa) topk, ind = tf.nn.top_k(vec, k=kappa, sorted=True) mask_sparse_v = tf.sparse_to_dense(ind, tf.shape(vec), tf.ones_like(ind, dtype=tf.float32), validate_indices=False) return mask_sparse_v
def single_obj_scoremap(scoremap): """ Applies my algorithm to figure out the most likely object from a given segmentation scoremap. """ with tf.variable_scope('single_obj_scoremap'): filter_size = 21 s = scoremap.get_shape().as_list() assert len(s) == 4, "Scoremap must be 4D." scoremap_softmax = tf.nn.softmax( scoremap) #B, H, W, C --> normalizes across last dimension scoremap_fg = tf.reduce_max(scoremap_softmax[:, :, :, 1:], 3) # B, H, W detmap_fg = tf.round(scoremap_fg) # B, H, W # find maximum in the fg scoremap max_loc = find_max_location(scoremap_fg) # use maximum to start "growing" our objectmap objectmap_list = list() kernel_dil = tf.ones( (filter_size, filter_size, 1)) / float(filter_size * filter_size) for i in range(s[0]): # create initial objectmap (put a one at the maximum) sparse_ind = tf.reshape( max_loc[i, :], [1, 2]) # reshape that its one point with 2dim) objectmap = tf.sparse_to_dense(sparse_ind, [s[1], s[2]], 1.0) # grow the map by dilation and pixelwise and num_passes = max(s[1], s[2]) // ( filter_size // 2 ) # number of passes needes to make sure the map can spread over the whole image for j in range(num_passes): objectmap = tf.reshape(objectmap, [1, s[1], s[2], 1]) objectmap_dil = tf.nn.dilation2d(objectmap, kernel_dil, [1, 1, 1, 1], [1, 1, 1, 1], 'SAME') objectmap_dil = tf.reshape(objectmap_dil, [s[1], s[2]]) objectmap = tf.round( tf.multiply(detmap_fg[i, :, :], objectmap_dil)) objectmap = tf.reshape(objectmap, [s[1], s[2], 1]) objectmap_list.append(objectmap) objectmap = tf.stack(objectmap_list) return objectmap
def disable_some_bgs(): # Mutatis mutandis, all comments from disable_some_fgs apply. shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed) disable_place = (tf.shape(bg_inds)[0] - max_bg) integrity_assertion = tf.assert_non_negative( disable_place, message="disable_place in disable_some_bgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense(sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast( proposals_label_shape, tf.int64), validate_indices=False) return tf.where(condition=is_disabled, x=tf.fill(dims=proposals_label_shape, value=-1.), y=proposals_label)
def hacked_tf_one_hot(indices, depth, on_value, off_value, name=None): '''Emulates new tf.one_hot in master. # Real signature: tf.one_hot(indices, depth, on_value, off_value, axis=None, name=None) # Assumed signature: tf.one_hot(indices, depth, on_value, off_value, axis=-1, name=None) Not needed if using newer versions of TensorFlow. ''' N = tf.shape(indices)[0] range_Nx1 = tf.expand_dims(tf.to_int64(tf.range(N)), 1) indices_Nx1 = tf.expand_dims(indices, 1) concat = tf.concat(1, [range_Nx1, indices_Nx1]) as_dense = tf.sparse_to_dense( concat, tf.to_int64(tf.pack([N, depth])), # Assumption: axis=-1 on_value, off_value) one_hot = tf.reshape(as_dense, (-1, depth), name=name) return one_hot
def one_hot_encoding(labels, num_classes, scope=None): """Transform numeric labels into onehot_labels. Args: labels: [batch_size] target labels. num_classes: total number of classes. scope: Optional scope for name_scope. Returns: one hot encoding of the labels. """ with tf.name_scope(scope, 'OneHotEncoding', [labels]): batch_size = labels.get_shape()[0] indices = tf.expand_dims(tf.range(0, batch_size), 1) labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype) concated = tf.concat([indices, labels], 1) onehot_labels = tf.sparse_to_dense(concated, tf.pack([batch_size, num_classes]), 1.0, 0.0) onehot_labels.set_shape([batch_size, num_classes]) return onehot_labels
def subsample_positive(): # Shuffle the foreground indices disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed) # Select the indices that we have to ignore, this is # `tf.shape(fg_inds)[0] - num_fg` because we want to get only # `num_fg` foreground labels. disable_place = (tf.shape(fg_inds)[0] - num_fg) disable_fg_inds = disable_fg_inds[:disable_place] # Order the indices for sparse_to_dense compatibility disable_fg_inds, _ = tf.nn.top_k(disable_fg_inds, k=tf.shape(disable_fg_inds)[-1]) disable_fg_inds = tf.reverse(disable_fg_inds, [0]) disable_fg_inds = tf.sparse_to_dense(disable_fg_inds, tf.shape(labels, out_type=tf.int64), True, default_value=False) # Put -1 to ignore the anchors in the selected indices return tf.where(condition=tf.squeeze(disable_fg_inds), x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels)
def _single_token_mask(inputs, tgt_len, num_predict, exclude_mask=None): """Sample individual tokens as prediction targets.""" func_mask = tf.equal(inputs, FLAGS.cls_id) func_mask = tf.logical_or(func_mask, tf.equal(inputs, FLAGS.sep_id)) func_mask = tf.logical_or(func_mask, tf.equal(inputs, FLAGS.pad_id)) if exclude_mask is None: exclude_mask = func_mask else: exclude_mask = tf.logical_or(func_mask, exclude_mask) candidate_mask = tf.logical_not(exclude_mask) all_indices = tf.range(tgt_len, dtype=tf.int64) candidate_indices = tf.boolean_mask(all_indices, candidate_mask) masked_pos = tf.random.shuffle(candidate_indices) masked_pos = tf.sort(masked_pos[:num_predict]) target_mask = tf.sparse_to_dense(sparse_indices=masked_pos, output_shape=[tgt_len], sparse_values=1.0, default_value=0.0) is_target = tf.cast(target_mask, tf.bool) return is_target, target_mask
def output(self): with tf.name_scope('CrossEntropyLoss'): l2_norm = tf.add_n([ tf.nn.l2_loss(self.item_list_emb), tf.nn.l2_loss(self.category_list_emb), tf.nn.l2_loss(self.position_list_emb), tf.nn.l2_loss(self.user_embedding), tf.nn.l2_loss(self.reconsume_lst_embedding) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose(self.embedding.item_emb_lookup_table) ''' self.output_w = variable_scope.get_variable("output_w", shape=[self.num_units, self.num_units], dtype=self.predict_behavior_emb.dtype) logits = tf.matmul(self.predict_behavior_emb, self.output_w) ''' logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) row_idx = tf.reshape(tf.range(0, self.now_bacth_data_size, delta=1), [-1, 1]) row_idx = tf.tile(row_idx, [1, self.max_len]) row_idx = tf.reshape(row_idx, [-1, 1]) masks = tf.sequence_mask(self.seq_length, maxlen=self.max_len) mask_item_list = tf.where(masks, self.item_list, (1 - tf.to_int32(masks)) * self.embedding.item_count) col_idx = tf.reshape(mask_item_list, [-1, 1]) reconsume_scores = tf.sparse_to_dense(sparse_indices=tf.concat([row_idx, col_idx], axis=1), sparse_values=tf.reshape(self.reconsume_scores, [-1, ]), output_shape=(self.now_bacth_data_size, self.embedding.item_count+3), validate_indices=False) # TODO 重新打分 predict_is_reconsume = tf.expand_dims(self.predict_is_reconsume, axis=-1) logits = logits + predict_is_reconsume * reconsume_scores self.item_result = logits # TODO for speed self.indices1 = tf.nn.top_k(self.item_result, 1).indices self.indices5 = tf.nn.top_k(self.item_result, 5).indices self.indices10 = tf.nn.top_k(self.item_result, 10).indices self.indices30 = tf.nn.top_k(self.item_result, 30).indices self.indices50 = tf.nn.top_k(self.item_result, 50).indices log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.target[0], [-1]) one_hot_labels = tf.one_hot( label_ids, depth=self.embedding.item_count + 3, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) """ loss reconsume """ predict_is_reconsume = tf.reshape(self.predict_is_reconsume,[-1,1]) predict_is_reconsume = tf.concat([1-predict_is_reconsume,predict_is_reconsume],axis=-1) reconsume_labels = tf.one_hot( tf.to_int32(self.is_reconsume), depth = 2 , dtype=tf.float32) self.loss_reconsume = tf.nn.softmax_cross_entropy_with_logits(labels = reconsume_labels,logits=predict_is_reconsume) predictions = tf.argmax(predict_is_reconsume,axis=-1,output_type=tf.int32) self.precision = tf.metrics.precision(labels=self.is_reconsume,predictions=predictions) self.recall = tf.metrics.recall(labels=self.is_reconsume,predictions=predictions) self.loss = regulation_rate * l2_norm + tf.reduce_mean(self.loss_origin) +\ tf.reduce_mean(self.loss_reconsume) # self.loss = regulation_rate * l2_norm + tf.reduce_mean(self.loss_origin) # tf.summary.scalar('l2_norm', l2_norm) tf.summary.scalar('Training Cross Entropy Loss', tf.reduce_mean(self.loss_origin)) tf.summary.scalar('Training Reconsume Loss', tf.reduce_mean(self.loss_reconsume)) tf.summary.scalar('normalized Training Loss', self.loss) tf.summary.scalar('l2_norm', l2_norm) tf.summary.scalar('Learning_rate', self.learning_rate) self.cal_gradient(tf.trainable_variables())
def combine_observation_code_and_values(observation_code_ids, observation_values, vocab_size, mode, normalize, momentum, min_value, max_value): """Combines SparseTensors of observation codes and values into a Tensor. Args: observation_code_ids: A SparseTensor of type int32 with the ids of the observation codes of dense shape [batch_size, max_sequence_length, 1]. There may be no time gaps in between codes. observation_values: A SparseTensor of type float with the observation values of dense shape [batch_size, max_sequence_length, 1]. There may be no time gaps in between codes. vocab_size: The range of the values in obs_code_ids is from 0 to vocab_size. mode: The execution mode, as defined in tf.estimator.ModeKeys. normalize: Whether to normalize each lab test. momentum: For the batch normalization mean and variance will be updated as momentum*old_value + (1-momentum) * new_value. min_value: Observation values smaller than this will be capped to min_value. max_value: Observation values larger than this will be capped to max_value. Returns: - obs_values: A dense representation of the observation_values at the position of their obs_code_ids. A padded Tensor of shape [batch_size, max_sequence_length, vocab_size] of type float32 where obs_values[b, t, id] = observation_values[b, t, 0] and id = observation_code_ids[b, t, 0] and obs_values[b, t, x] = 0 for all other x != id. If t is greater than the sequence_length of batch entry b then the result is 0 as well. - indicator: A one-hot encoding of whether a value in obs_values comes from observation_values or is just filled in to be 0. A Tensor of shape [batch_size, max_sequence_length, vocab_size] and type float32. """ obs_code = observation_code_ids.values if normalize: with tf.variable_scope('values'): observation_values = normalize_each_feature( observation_values, obs_code, vocab_size, mode, momentum) observation_values_rank2 = tf.SparseTensor( values=observation_values.values, indices=observation_values.indices[:, 0:2], dense_shape=observation_values.dense_shape[0:2]) obs_indices = tf.concat( [observation_values_rank2.indices, tf.expand_dims(obs_code, axis=1)], axis=1, name='obs_indices') obs_shape = tf.concat( [observation_values_rank2.dense_shape, [vocab_size]], axis=0, name='obs_shape') obs_values = tf.sparse_to_dense(obs_indices, obs_shape, observation_values_rank2.values) obs_values.set_shape([None, None, vocab_size]) indicator = tf.sparse_to_dense(obs_indices, obs_shape, tf.ones_like(observation_values_rank2.values)) indicator.set_shape([None, None, vocab_size]) # clip obs_values = tf.minimum(obs_values, max_value) obs_values = tf.maximum(obs_values, min_value) return obs_values, indicator
def true_fn(): return tf.sparse_to_dense( groundtruth_labeled_classes - _LABEL_OFFSET, [num_classes], tf.constant(1, dtype=tf.float32), validate_indices=False)
def _build(self, probs, all_anchors, gt_boxes): """ Args: all_anchors: A Tensor with anchors for all of SSD's features. The shape of the Tensor is (num_anchors, 4). gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: class_targets: Either a truth value of the anchor (a value between 0 and num_classes, with 0 being background), or -1 when the anchor is to be ignored in the minibatch. The shape of the Tensor is (num_anchors, 1). bbox_offsets_targets: A bounding box regression target for each of the anchors that have a greater than zero label. For every other anchors we return zeros. The shape of the Tensor is (num_anchors, 4). """ all_anchors = tf.cast(all_anchors, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) # We are going to label each anchor based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # unknown. anchors_label_shape = tf.gather(tf.shape(all_anchors), [0]) anchors_label = tf.fill(dims=anchors_label_shape, value=-1.) overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4]) max_overlaps = tf.reduce_max(overlaps, axis=1) # Get the index of the best gt_box for each anchor. best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum 1 to it because 0 is used for # background. best_fg_labels_for_anchors = tf.add( tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.) iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold) # We update anchors_label with the value in # best_fg_labels_for_anchors only when the box is foreground. # TODO: Replace with a sparse_to_dense with -1 default_value anchors_label = tf.where(condition=iou_is_fg, x=best_fg_labels_for_anchors, y=anchors_label) best_anchor_idxs = tf.argmax(overlaps, axis=0) is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs, sparse_values=True, default_value=False, output_shape=tf.cast( anchors_label_shape, tf.int64), validate_indices=False) # Now we need to find the anchors that are the best for each of the # gt_boxes. We overwrite the previous anchors_label with this # because setting the best anchor for each gt_box has priority. best_anchors_gt_labels = tf.sparse_to_dense( sparse_indices=best_anchor_idxs, sparse_values=gt_boxes[:, 4] + 1, default_value=-1, output_shape=tf.cast(anchors_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes") anchors_label = tf.where(condition=is_best_box, x=best_anchors_gt_labels, y=anchors_label, name="update_labels_for_bestbox_anchors") # Use the worst backgrounds (the bgs whose probability of being fg is # the greatest). cls_probs = probs[:, 1:] max_cls_probs = tf.reduce_max(cls_probs, axis=1) # Exclude boxes with IOU > `background_threshold_high` with any GT. iou_less_than_bg_tresh_high_filter = tf.less_equal( max_overlaps, self._background_threshold_high) bg_anchors = tf.less_equal(anchors_label, 0) bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter, bg_anchors) max_cls_probs = tf.where( condition=bg_overlaps_filter, x=max_cls_probs, y=tf.fill(dims=anchors_label_shape, value=-1.), ) # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the hard minning ratio. num_fg_mask = tf.greater(anchors_label, 0.0) num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32) num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32) top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg) set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices, sparse_values=True, default_value=False, output_shape=anchors_label_shape, validate_indices=False) anchors_label = tf.where(condition=set_bg, x=tf.fill(dims=anchors_label_shape, value=0.), y=anchors_label) # Next step is to calculate the proper bbox targets for the labeled # anchors based on the values of the ground-truth boxes. # We have to use only the anchors labeled >= 1, each matching with # the proper gt_boxes # Get the ids of the anchors that mater for bbox_target comparison. is_anchor_with_target = tf.greater(anchors_label, 0) anchors_with_target_idx = tf.where(condition=is_anchor_with_target) # Get the corresponding ground truth box only for the anchors with # target. gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx, anchors_with_target_idx) # Get the values of the ground truth boxes. anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs) # We create the same array but with the anchors anchors_with_target = tf.gather_nd(all_anchors, anchors_with_target_idx) # We create our targets with bbox_transform bbox_targets = encode(anchors_with_target, anchors_gt_boxes, variances=self._variances) # We unmap targets to anchor_labels (containing the length of # anchors) bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx, updates=bbox_targets, shape=tf.cast(tf.shape(all_anchors), tf.int64)) return anchors_label, bbox_targets
def _build(self, proposals, gt_boxes): """ Args: proposals: A Tensor with the RPN bounding boxes proposals. The shape of the Tensor is (num_proposals, 4). gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: proposals_label: Either a truth value of the proposals (a value between 0 and num_classes, with 0 being background), or -1 when the proposal is to be ignored in the minibatch. The shape of the Tensor is (num_proposals, 1). bbox_targets: A bounding box regression target for each of the proposals that have and greater than zero label. For every other proposal we return zeros. The shape of the Tensor is (num_proposals, 4). """ overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4]) # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of # proposal P and ground truth box G in overlaps[P, G] # We are going to label each proposal based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # ignored. proposals_label_shape = tf.gather(tf.shape(proposals), [0]) proposals_label = tf.fill(dims=proposals_label_shape, value=-1.) # For each overlap there is three possible outcomes for labelling: # if max(iou) < config.background_threshold_low then we ignore. # elif max(iou) <= config.background_threshold_high then we label # background. # elif max(iou) > config.foreground_threshold then we label with # the highest IoU in overlap. # # max_overlaps gets, for each proposal, the index in which we can # find the gt_box with which it has the highest overlap. max_overlaps = tf.reduce_max(overlaps, axis=1) iou_is_high_enough_for_bg = tf.greater_equal( max_overlaps, self._background_threshold_low) iou_is_not_too_high_for_bg = tf.less(max_overlaps, self._background_threshold_high) bg_condition = tf.logical_and(iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg) proposals_label = tf.where(condition=bg_condition, x=tf.zeros_like(proposals_label, dtype=tf.float32), y=proposals_label) # Get the index of the best gt_box for each proposal. overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum it one because 0 is used for # background. best_fg_labels_for_proposals = tf.add( tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs), 1.) iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold) best_proposals_idxs = tf.argmax(overlaps, axis=0) # Set the indices in best_proposals_idxs to True, and the rest to # false. # tf.sparse_to_dense is used because we know the set of indices which # we want to set to True, and we know the rest of the indices # should be set to False. That's exactly the use case of # tf.sparse_to_dense. is_best_box = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False) # We update proposals_label with the value in # best_fg_labels_for_proposals only when the box is foreground. proposals_label = tf.where(condition=iou_is_fg, x=best_fg_labels_for_proposals, y=proposals_label) # Now we need to find the proposals that are the best for each of the # gt_boxes. We overwrite the previous proposals_label with this # because setting the best proposal for each gt_box has priority. best_proposals_gt_labels = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=gt_boxes[:, 4] + 1, default_value=0., output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes") proposals_label = tf.where(condition=is_best_box, x=best_proposals_gt_labels, y=proposals_label, name="update_labels_for_bestbox_proposals") # proposals_label now has a value in [0, num_classes + 1] for # proposals we are going to use and -1 for the ones we should ignore. # But we still need to make sure we don't have a number of proposals # higher than minibatch_size * foreground_fraction. max_fg = int(self._foreground_fraction * self._minibatch_size) fg_condition = tf.logical_or(iou_is_fg, is_best_box) fg_inds = tf.where(condition=fg_condition) def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False) return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label) # Disable some fgs if we have too many foregrounds. proposals_label = tf.cond(tf.greater(tf.shape(fg_inds)[0], max_fg), true_fn=disable_some_fgs, false_fn=lambda: proposals_label) total_fg_in_batch = tf.shape( tf.where(condition=tf.greater(proposals_label, 0)))[0] # Now we want to do the same for backgrounds. # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the total desired batch size. max_bg = self._minibatch_size - total_fg_in_batch # We can't use bg_condition because some of the proposals that satisfy # the IoU conditions to be background may have been labeled as # foreground due to them being the best proposal for a certain gt_box. bg_mask = tf.equal(proposals_label, 0) bg_inds = tf.where(condition=bg_mask, ) def disable_some_bgs(): # Mutatis mutandis, all comments from disable_some_fgs apply. shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed) disable_place = (tf.shape(bg_inds)[0] - max_bg) integrity_assertion = tf.assert_non_negative( disable_place, message="disable_place in disable_some_bgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense(sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast( proposals_label_shape, tf.int64), validate_indices=False) return tf.where(condition=is_disabled, x=tf.fill(dims=proposals_label_shape, value=-1.), y=proposals_label) proposals_label = tf.cond(tf.greater_equal( tf.shape(bg_inds)[0], max_bg), true_fn=disable_some_bgs, false_fn=lambda: proposals_label) """ Next step is to calculate the proper targets for the proposals labeled based on the values of the ground-truth boxes. We have to use only the proposals labeled >= 1, each matching with the proper gt_boxes """ # Get the ids of the proposals that matter for bbox_target comparisson. is_proposal_with_target = tf.greater(proposals_label, 0) proposals_with_target_idx = tf.where(condition=is_proposal_with_target) # Get the corresponding ground truth box only for the proposals with # target. gt_boxes_idxs = tf.gather(overlaps_best_gt_idxs, proposals_with_target_idx) # Get the values of the ground truth boxes. proposals_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs) # We create the same array but with the proposals proposals_with_target = tf.gather_nd(proposals, proposals_with_target_idx) # We create our targets with bbox_transform. bbox_targets_nonzero = encode( proposals_with_target, proposals_gt_boxes, variances=self._variances, ) # We unmap targets to proposal_labels (containing the length of # proposals) bbox_targets = tf.scatter_nd(indices=proposals_with_target_idx, updates=bbox_targets_nonzero, shape=tf.cast(tf.shape(proposals), tf.int64)) proposals_label = proposals_label bbox_targets = bbox_targets return proposals_label, bbox_targets
def _build(self, all_anchors, gt_boxes, im_shape): """ We compare anchors to GT and using the minibatch size and the different config settings (clobber, foreground fraction, etc), we end up with training targets *only* for the elements we want to use in the batch, while everything else is ignored. Basically what it does is, first generate the targets for all (valid) anchors, and then start subsampling the positive (foreground) and the negative ones (background) based on the number of samples of each type that we want. Args: all_anchors: A Tensor with all the bounding boxes coords of the anchors. Its shape should be (num_anchors, 4). gt_boxes: A Tensor with the ground truth bounding boxes of the image of the batch being processed. Its shape should be (num_gt, 5). The last dimension is used for the label. im_shape: Shape of original image (height, width) in order to define anchor targers in respect with gt_boxes. Returns: Tuple of the tensors of: labels: (1, 0, -1) for each anchor. Shape (num_anchors, 1) bbox_targets: 4d bbox targets as specified by paper. Shape (num_anchors, 4) max_overlaps: Max IoU overlap with ground truth boxes. Shape (num_anchors, 1) """ # Keep only the coordinates of gt_boxes gt_boxes = gt_boxes[:, :4] all_anchors = all_anchors[:, :4] # Only keep anchors inside the image (x_min_anchor, y_min_anchor, x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1) anchor_filter = tf.logical_and( tf.logical_and( tf.greater_equal(x_min_anchor, -self._allowed_border), tf.greater_equal(y_min_anchor, -self._allowed_border)), tf.logical_and( tf.less(x_max_anchor, im_shape[1] + self._allowed_border), tf.less(y_max_anchor, im_shape[0] + self._allowed_border))) # We (force) reshape the filter so that we can use it as a boolean mask anchor_filter = tf.reshape(anchor_filter, [-1]) # Filter anchors. anchors = tf.boolean_mask(all_anchors, anchor_filter, name='filter_anchors') # Generate array with the labels for all_anchors. labels = tf.fill((tf.gather(tf.shape(all_anchors), [0])), -1) labels = tf.boolean_mask(labels, anchor_filter, name='filter_labels') # Intersection over union (IoU) overlap between the anchors and the # ground truth boxes. overlaps = bbox_overlap_tf(tf.to_float(anchors), tf.to_float(gt_boxes)) # Generate array with the IoU value of the closest GT box for each # anchor. max_overlaps = tf.reduce_max(overlaps, axis=1) if not self._clobber_positives: # Assign bg labels first so that positive labels can clobber them. # First we get an array with True where IoU is less than # self._negative_overlap negative_overlap_nonzero = tf.less(max_overlaps, self._negative_overlap) # Finally we set 0 at True indices labels = tf.where(condition=negative_overlap_nonzero, x=tf.zeros(tf.shape(labels)), y=tf.to_float(labels)) # Get the value of the max IoU for the closest anchor for each gt. gt_max_overlaps = tf.reduce_max(overlaps, axis=0) # Find all the indices that match (at least one, but could be more). gt_argmax_overlaps = tf.squeeze(tf.equal(overlaps, gt_max_overlaps)) gt_argmax_overlaps = tf.where(gt_argmax_overlaps)[:, 0] # Eliminate duplicates indices. gt_argmax_overlaps, _ = tf.unique(gt_argmax_overlaps) # Order the indices for sparse_to_dense compatibility gt_argmax_overlaps, _ = tf.nn.top_k(gt_argmax_overlaps, k=tf.shape(gt_argmax_overlaps)[-1]) gt_argmax_overlaps = tf.reverse(gt_argmax_overlaps, [0]) # Foreground label: for each ground-truth, anchor with highest overlap. # When the argmax is many items we use all of them (for consistency). # We set 1 at gt_argmax_overlaps_cond indices gt_argmax_overlaps_cond = tf.sparse_to_dense(gt_argmax_overlaps, tf.shape( labels, out_type=tf.int64), True, default_value=False) labels = tf.where(condition=gt_argmax_overlaps_cond, x=tf.ones(tf.shape(labels)), y=tf.to_float(labels)) # Foreground label: above threshold Intersection over Union (IoU) # First we get an array with True where IoU is greater or equal than # self._positive_overlap positive_overlap_inds = tf.greater_equal(max_overlaps, self._positive_overlap) # Finally we set 1 at True indices labels = tf.where(condition=positive_overlap_inds, x=tf.ones(tf.shape(labels)), y=labels) if self._clobber_positives: # Assign background labels last so that negative labels can clobber # positives. First we get an array with True where IoU is less than # self._negative_overlap negative_overlap_nonzero = tf.less(max_overlaps, self._negative_overlap) # Finally we set 0 at True indices labels = tf.where(condition=negative_overlap_nonzero, x=tf.zeros(tf.shape(labels)), y=labels) # Subsample positive labels if we have too many def subsample_positive(): # Shuffle the foreground indices disable_fg_inds = tf.random_shuffle(fg_inds, seed=self._seed) # Select the indices that we have to ignore, this is # `tf.shape(fg_inds)[0] - num_fg` because we want to get only # `num_fg` foreground labels. disable_place = (tf.shape(fg_inds)[0] - num_fg) disable_fg_inds = disable_fg_inds[:disable_place] # Order the indices for sparse_to_dense compatibility disable_fg_inds, _ = tf.nn.top_k(disable_fg_inds, k=tf.shape(disable_fg_inds)[-1]) disable_fg_inds = tf.reverse(disable_fg_inds, [0]) disable_fg_inds = tf.sparse_to_dense(disable_fg_inds, tf.shape(labels, out_type=tf.int64), True, default_value=False) # Put -1 to ignore the anchors in the selected indices return tf.where(condition=tf.squeeze(disable_fg_inds), x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels) num_fg = tf.to_int32(self._foreground_fraction * self._minibatch_size) # Get foreground indices, get True in the indices where we have a one. fg_inds = tf.equal(labels, 1) # We get only the indices where we have True. fg_inds = tf.squeeze(tf.where(fg_inds), axis=1) fg_inds_size = tf.size(fg_inds) # Condition for check if we have too many positive labels. subsample_positive_cond = fg_inds_size > num_fg # Check the condition and subsample positive labels. labels = tf.cond(subsample_positive_cond, true_fn=subsample_positive, false_fn=lambda: labels) # Subsample negative labels if we have too many def subsample_negative(): # Shuffle the background indices disable_bg_inds = tf.random_shuffle(bg_inds, seed=self._seed) # Select the indices that we have to ignore, this is # `tf.shape(bg_inds)[0] - num_bg` because we want to get only # `num_bg` background labels. disable_place = (tf.shape(bg_inds)[0] - num_bg) disable_bg_inds = disable_bg_inds[:disable_place] # Order the indices for sparse_to_dense compatibility disable_bg_inds, _ = tf.nn.top_k(disable_bg_inds, k=tf.shape(disable_bg_inds)[-1]) disable_bg_inds = tf.reverse(disable_bg_inds, [0]) disable_bg_inds = tf.sparse_to_dense(disable_bg_inds, tf.shape(labels, out_type=tf.int64), True, default_value=False) # Put -1 to ignore the anchors in the selected indices return tf.where(condition=tf.squeeze(disable_bg_inds), x=tf.to_float(tf.fill(tf.shape(labels), -1)), y=labels) # Recalculate the foreground indices after (maybe) disable some of them # Get foreground indices, get True in the indices where we have a one. fg_inds = tf.equal(labels, 1) # We get only the indices where we have True. fg_inds = tf.squeeze(tf.where(fg_inds), axis=1) fg_inds_size = tf.size(fg_inds) num_bg = tf.to_int32(self._minibatch_size - fg_inds_size) # Get background indices, get True in the indices where we have a zero. bg_inds = tf.equal(labels, 0) # We get only the indices where we have True. bg_inds = tf.squeeze(tf.where(bg_inds), axis=1) bg_inds_size = tf.size(bg_inds) # Condition for check if we have too many positive labels. subsample_negative_cond = bg_inds_size > num_bg # Check the condition and subsample positive labels. labels = tf.cond(subsample_negative_cond, true_fn=subsample_negative, false_fn=lambda: labels) # Return bbox targets with shape (anchors.shape[0], 4). # Find the closest gt box for each anchor. argmax_overlaps = tf.argmax(overlaps, axis=1) # Eliminate duplicates. argmax_overlaps_unique, _ = tf.unique(argmax_overlaps) # Filter the gt_boxes. # We get only the indices where we have "inside anchors". anchor_filter_inds = tf.where(anchor_filter) gt_boxes = tf.gather(gt_boxes, argmax_overlaps) bbox_targets = encode_tf(anchors, gt_boxes) # For the anchors that arent foreground, we ignore the bbox_targets. anchor_foreground_filter = tf.equal(labels, 1) bbox_targets = tf.where(condition=anchor_foreground_filter, x=bbox_targets, y=tf.zeros_like(bbox_targets)) # We unroll "inside anchors" value for all anchors (for shape # compatibility). # We complete the missed indices with zeros # (because scatter_nd has zeros as default). bbox_targets = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds), updates=bbox_targets, shape=tf.shape(all_anchors)) labels_scatter = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds), updates=labels, shape=[tf.shape(all_anchors)[0]]) # We have to put -1 to ignore the indices with 0 generated by # scatter_nd, otherwise it will be considered as background. labels = tf.where(condition=anchor_filter, x=labels_scatter, y=tf.to_float(tf.fill(tf.shape(labels_scatter), -1))) max_overlaps = tf.scatter_nd(indices=tf.to_int32(anchor_filter_inds), updates=max_overlaps, shape=[tf.shape(all_anchors)[0]]) return labels, bbox_targets, max_overlaps