def test(self): import time print(self.gt_boxes) # time.sleep(10) with tf.Session() as sess: rois = self.gt_boxes[:, :4] rois = rois + np.random.randint(-3, 3, (self.N, 4)) bgs = np.random.randint(0, 60, (self.N + 2, 2)) bgs = np.hstack( (bgs, bgs + np.random.randint(20, 30, (self.N + 2, 2)))) bgs = bgs.astype(np.float32) rois = np.vstack((rois, bgs)) self.rois = rois print(rois) print(self.gt_boxes) labels, bbox_targets, bbox_inside_weights = \ roi_encoder(self.gt_boxes, self.rois, self.num_classes) self.labels = labels.eval() self.bbox_targets = bbox_targets.eval() self.bbox_inside_weights = bbox_inside_weights.eval() print(self.labels.shape) print(self.labels) print(self.bbox_targets.shape) print(self.bbox_inside_weights.shape) print('learning targets:') for i in range(self.labels.size): s = int(4 * self.labels[i]) e = s + 4 print(self.labels[i], self.bbox_targets[i, s:e], self.bbox_inside_weights[i, s:e]) scores = np.random.rand(self.rois.shape[0], self.num_classes) scores = scores.astype(np.float32) final_boxes, classes, scores = \ roi_decoder(self.bbox_targets, scores, self.rois, 100, 100) self.final_boxes = final_boxes.eval() self.scores = scores.eval() self.classes = classes.eval() print('rois:') print(self.rois) print('final_boxes:') print(self.final_boxes)
def test(self): import time print (self.gt_boxes) # time.sleep(10) with tf.Session() as sess: rois = self.gt_boxes[:, :4] rois = rois + np.random.randint(-3, 3, (self.N, 4)) bgs = np.random.randint(0, 60, (self.N + 2, 2)) bgs = np.hstack((bgs, bgs + np.random.randint(20, 30, (self.N + 2, 2)))) bgs = bgs.astype(np.float32) rois = np.vstack((rois, bgs)) self.rois = rois print (rois) print (self.gt_boxes) labels, bbox_targets, bbox_inside_weights = \ roi_encoder(self.gt_boxes, self.rois, self.num_classes) self.labels = labels.eval() self.bbox_targets = bbox_targets.eval() self.bbox_inside_weights = bbox_inside_weights.eval() print (self.labels.shape) print (self.labels) print (self.bbox_targets.shape) print (self.bbox_inside_weights.shape) print ('learning targets:') for i in range(self.labels.size): s = int(4 * self.labels[i]) e = s + 4 print (self.labels[i], self.bbox_targets[i, s:e], self.bbox_inside_weights[i, s:e]) scores = np.random.rand(self.rois.shape[0], self.num_classes) scores = scores.astype(np.float32) final_boxes, classes, scores = \ roi_decoder(self.bbox_targets, scores, self.rois, 100, 100) self.final_boxes = final_boxes.eval() self.scores = scores.eval() self.classes = classes.eval() print ('rois:') print (self.rois) print ('final_boxes:') print (self.final_boxes)
def build_losses(pyramid, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw=1.0, rpn_cls_lw=1.0, refined_box_lw=1.0, refined_cls_lw=1.0, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1} *_lw: loss weight of rpn, refined and mask losses Returns: ------- l: a loss tensor """ for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] ### rpn losses # 1. encode ground truth # 2. compute distances all_anchors = gen_all_anchors(height, width, stride) labels, bbox_targets, bbox_inside_weights = \ anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder') boxes = outputs[p]['rpn']['box'] classes = tf.reshape(outputs[p]['rpn']['cls'], (1, height, width, base_anchors, 2)) labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, 2]), tf.reshape(boxes, [-1, 4]), tf.reshape(bbox_targets, [-1, 4]), tf.reshape(bbox_inside_weights, [-1, 4]) ]) rpn_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # So the BEST practise still should be gathering all none-negative examples labels = slim.one_hot_encoding( labels, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.losses.softmax_cross_entropy( labels, classes) ### refined loss # 1. encode ground truth # 2. compute distances rois = outputs[p]['roi']['box'] boxes = outputs[p]['refined']['box'] classes = outputs[p]['refined']['cls'] labels, bbox_targets, bbox_inside_weights = \ roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder') labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]),[ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, num_classes]), tf.reshape(boxes, [-1, num_classes * 4]), tf.reshape(bbox_targets, [-1, num_classes * 4]), tf.reshape(bbox_inside_weights, [-1, num_classes * 4]) ] ) refined_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) refined_box_loss = tf.reshape(refined_box_loss, [-1, 4]) refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1) refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss) labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0) refined_cls_loss = refined_cls_lw * tf.losses.softmax_cross_entropy( classes, labels) ### mask loss # mask of shape (N, h, w, num_classes*2) masks = outputs[p]['mask']['mask'] mask_shape = tf.shape(masks) masks = tf.reshape(masks, (mask_shape[0], mask_shape[1], mask_shape[2], tf.cast(mask_shape[3] / 2, tf.int32), 2)) labels, mask_targets, mask_inside_weights = \ mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder') labels, masks, mask_targets, mask_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), masks, mask_targets, mask_inside_weights, ]) mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0) mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy( masks, mask_targets) return rpn_box_loss + rpn_cls_loss + refined_box_loss + refined_cls_loss + mask_binary_loss
def build_losses(pyramid, outputs, gt_boxes, gt_masks, num_classes, rpn_box_lw=1.0, rpn_cls_lw=1.0, refined_box_lw=1.0, refined_cls_lw=1.0, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1} *_lw: loss weight of rpn, refined and mask losses Returns: ------- l: a loss tensor """ for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] ### rpn losses # 1. encode ground truth # 2. compute distances all_anchors = gen_all_anchors(height, width, stride) labels, bbox_targets, bbox_inside_weights = \ anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder') boxes = outputs[p]['rpn']['box'] classes = outputs[p]['rpn']['cls'] rpn_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) labels = slim.one_hot_encoding(labels, 2, on_value=1.0, off_value=0.0) rpn_cls_loss = rpn_cls_lw * tf.losses.softmax_cross_entropy( classes, labels) ### refined loss # 1. encode ground truth # 2. compute distances rois = outputs[p]['roi']['box'] boxes = outputs[p]['refined']['box'] classes = outputs[p]['refined']['cls'] labels, bbox_targets, bbox_inside_weights = \ roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder') refined_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) refined_box_loss = tf.reshape(refined_box_loss, [-1, 4]) refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1) refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss) labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0) refined_cls_loss = refined_cls_lw * tf.losses.softmax_cross_entropy( classes, labels) ### mask loss # {'mask': m, 'classes': classes, 'scores': scores} masks = outputs[p]['mask']['mask'] labels, mask_targets, mask_inside_weights = \ mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder') return
def build_losses(pyramid, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw=1.0, rpn_cls_lw=1.0, refined_box_lw=1.0, refined_cls_lw=1.0, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1}Ì[MaÌ[MaÌ]] *_lw: loss weight of rpn, refined and mask losses Returns: ------- l: a loss tensor """ # losses for pyramid losses = [] rpn_box_losses, rpn_cls_losses = [], [] refined_box_losses, refined_cls_losses = [], [] mask_losses = [] # watch some info during training rpn_batch = [] refine_batch = [] mask_batch = [] rpn_batch_pos = [] refine_batch_pos = [] mask_batch_pos = [] arg_scope = _extra_conv_arg_scope(activation_fn=None) with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): ## assigning gt_boxes assigned_gt_boxes = assign_boxes(gt_boxes, [2, 3, 4, 5]) assigned_layer_inds = assigned_gt_boxes[-1] ## build losses for PFN for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] gt_boxes = assigned_gt_boxes[i - 2] ### rpn losses # 1. encode ground truth # 2. compute distances anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)] all_anchors = gen_all_anchors(height, width, stride, anchor_scales) labels, bbox_targets, bbox_inside_weights = \ anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder') boxes = outputs[p]['rpn']['box'] classes = tf.reshape(outputs[p]['rpn']['cls'], (1, height, width, base_anchors, 2)) labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, 2]), tf.reshape(boxes, [-1, 4]), tf.reshape(bbox_targets, [-1, 4]), tf.reshape(bbox_inside_weights, [-1, 4]) ]) _, frac_ = _get_valid_sample_fraction(labels) rpn_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 0), tf.float32))) rpn_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 1), tf.float32))) rpn_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) rpn_box_losses.append(rpn_box_loss) # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # So the BEST practise still should be gathering all none-negative examples labels = slim.one_hot_encoding( labels, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=classes) rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss) rpn_cls_losses.append(rpn_cls_loss) ### refined loss # 1. encode ground truth # 2. compute distances rois = outputs[p]['roi']['box'] boxes = outputs[p]['refined']['box'] classes = outputs[p]['refined']['cls'] labels, bbox_targets, bbox_inside_weights = \ roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder') labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]),[ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, num_classes]), tf.reshape(boxes, [-1, num_classes * 4]), tf.reshape(bbox_targets, [-1, num_classes * 4]), tf.reshape(bbox_inside_weights, [-1, num_classes * 4]) ] ) frac, frac_ = _get_valid_sample_fraction(labels) refine_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 0), tf.float32))) refine_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 1), tf.float32))) refined_box_loss = bbox_inside_weights * _smooth_l1_dist( boxes, bbox_targets) refined_box_loss = tf.reshape(refined_box_loss, [-1, 4]) refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1) refined_box_loss = refined_box_lw * tf.reduce_mean( refined_box_loss) * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss) refined_box_losses.append(refined_box_loss) labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0) refined_cls_loss = refined_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=classes) refined_cls_loss = tf.reduce_mean(refined_cls_loss) * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_cls_loss) refined_cls_losses.append(refined_cls_loss) ### mask loss # mask of shape (N, h, w, num_classes*2) masks = outputs[p]['mask']['mask'] # mask_shape = tf.shape(masks) # masks = tf.reshape(masks, (mask_shape[0], mask_shape[1], # mask_shape[2], tf.cast(mask_shape[3]/2, tf.int32), 2)) labels, mask_targets, mask_inside_weights = \ mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder') labels, masks, mask_targets, mask_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), masks, mask_targets, mask_inside_weights, ]) _, frac_ = _get_valid_sample_fraction(labels) mask_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 0), tf.float32))) mask_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(labels, 1), tf.float32))) # mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0) # mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(mask_targets, masks) # NOTE: w/o competition between classes. mask_targets = tf.cast(mask_targets, tf.float32) mask_loss = mask_lw * tf.nn.sigmoid_cross_entropy_with_logits( labels=mask_targets, logits=masks) mask_loss = tf.reduce_mean(mask_loss) mask_loss = tf.cond(tf.greater(tf.size(labels), 0), lambda: mask_loss, lambda: tf.constant(0.0)) tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss) mask_losses.append(mask_loss) rpn_box_losses = tf.add_n(rpn_box_losses) rpn_cls_losses = tf.add_n(rpn_cls_losses) refined_box_losses = tf.add_n(refined_box_losses) refined_cls_losses = tf.add_n(refined_cls_losses) mask_losses = tf.add_n(mask_losses) losses = [ rpn_box_losses, rpn_cls_losses, refined_box_losses, refined_cls_losses, mask_losses ] total_loss = tf.add_n(losses) rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32) refine_batch = tf.cast(tf.add_n(refine_batch), tf.float32) mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32) rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32) refine_batch_pos = tf.cast(tf.add_n(refine_batch_pos), tf.float32) mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32) return total_loss, losses, [rpn_batch_pos, rpn_batch, \ refine_batch_pos, refine_batch, \ mask_batch_pos, mask_batch]
def build_losses(pyramid, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw =1.0, rpn_cls_lw = 1.0, refined_box_lw=1.0, refined_cls_lw=1.0, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1}Ì[MaÌ[MaÌ]] *_lw: loss weight of rpn, refined and mask losses Returns: ------- l: a loss tensor """ # losses for pyramid losses = [] rpn_box_losses, rpn_cls_losses = [], [] refined_box_losses, refined_cls_losses = [], [] mask_losses = [] # watch some info during training rpn_batch = [] refine_batch = [] mask_batch = [] rpn_batch_pos = [] refine_batch_pos = [] mask_batch_pos = [] arg_scope = _extra_conv_arg_scope(activation_fn=None) with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): ## assigning gt_boxes [assigned_gt_boxes, assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes], [2, 3, 4, 5]) ## build losses for PFN for i in range(5, 1, -1): p = 'P%d' % i stride = 2 ** i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] splitted_gt_boxes = assigned_gt_boxes[i-2] ### rpn losses # 1. encode ground truth # 2. compute distances # anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)] # all_anchors = gen_all_anchors(height, width, stride, anchor_scales) all_anchors = outputs['rpn'][p]['anchor'] labels, bbox_targets, bbox_inside_weights = \ anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder') boxes = outputs['rpn'][p]['box'] classes = tf.reshape(outputs['rpn'][p]['cls'], (1, height, width, base_anchors, 2)) labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, 2]), tf.reshape(boxes, [-1, 4]), tf.reshape(bbox_targets, [-1, 4]), tf.reshape(bbox_inside_weights, [-1, 4]) ]) # _, frac_ = _get_valid_sample_fraction(labels) rpn_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) rpn_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) rpn_box_losses.append(rpn_box_loss) # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # the BEST practise still should be gathering all none-negative examples labels = slim.one_hot_encoding(labels, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss) rpn_cls_losses.append(rpn_cls_loss) ### refined loss # 1. encode ground truth # 2. compute distances rois = outputs['roi']['box'] boxes = outputs['refined']['box'] classes = outputs['refined']['cls'] labels, bbox_targets, bbox_inside_weights = \ roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder') labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]),[ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, num_classes]), tf.reshape(boxes, [-1, num_classes * 4]), tf.reshape(bbox_targets, [-1, num_classes * 4]), tf.reshape(bbox_inside_weights, [-1, num_classes * 4]) ] ) # frac, frac_ = _get_valid_sample_fraction(labels, 1) refine_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) refine_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) refined_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets) refined_box_loss = tf.reshape(refined_box_loss, [-1, 4]) refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1) refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss) refined_box_losses.append(refined_box_loss) labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0) refined_cls_loss = refined_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) refined_cls_loss = tf.reduce_mean(refined_cls_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_cls_loss) refined_cls_losses.append(refined_cls_loss) ### mask loss # mask of shape (N, h, w, num_classes) masks = outputs['mask']['mask'] # mask_shape = tf.shape(masks) # masks = tf.reshape(masks, (mask_shape[0], mask_shape[1], # mask_shape[2], tf.cast(mask_shape[3]/2, tf.int32), 2)) labels, mask_targets, mask_inside_weights = \ mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder') labels, masks, mask_targets, mask_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), masks, mask_targets, mask_inside_weights, ]) # _, frac_ = _get_valid_sample_fraction(labels) mask_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) mask_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) # mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0) # mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(mask_targets, masks) # NOTE: w/o competition between classes. mask_targets = tf.cast(mask_targets, tf.float32) mask_loss = mask_lw * tf.nn.sigmoid_cross_entropy_with_logits(labels=mask_targets, logits=masks) mask_loss = tf.reduce_mean(mask_loss) mask_loss = tf.cond(tf.greater(tf.size(labels), 0), lambda: mask_loss, lambda: tf.constant(0.0)) tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss) mask_losses.append(mask_loss) rpn_box_losses = tf.add_n(rpn_box_losses) rpn_cls_losses = tf.add_n(rpn_cls_losses) refined_box_losses = tf.add_n(refined_box_losses) refined_cls_losses = tf.add_n(refined_cls_losses) mask_losses = tf.add_n(mask_losses) losses = [rpn_box_losses, rpn_cls_losses, refined_box_losses, refined_cls_losses, mask_losses] total_loss = tf.add_n(losses) rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32) refine_batch = tf.cast(tf.add_n(refine_batch), tf.float32) mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32) rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32) refine_batch_pos = tf.cast(tf.add_n(refine_batch_pos), tf.float32) mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32) return total_loss, losses, [rpn_batch_pos, rpn_batch, \ refine_batch_pos, refine_batch, \ mask_batch_pos, mask_batch]
def build_losses(pyramid, py_scope, slim_scope, image_height, image_width, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw=0.1, rpn_cls_lw=0.1, rcnn_box_lw=1.0, rcnn_cls_lw=0.1, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1}Ì[MaÌ[MaÌ]] *_lw: loss weight of rpn, rcnn and mask losses Returns: ------- l: a loss tensor """ # losses for pyramid losses = [] rpn_box_losses, rpn_cls_losses = [], [] rcnn_box_losses, rcnn_cls_losses = [], [] mask_losses = [] # watch some info during training rpn_batch = [] rcnn_batch = [] mask_batch = [] rpn_batch_pos = [] rcnn_batch_pos = [] mask_batch_pos = [] # if _BN is True: # arg_scope = _extra_conv_arg_scope_with_bn() # # arg_scope = _extra_conv_arg_scope_with_bn(is_training=True) # else: # arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu) with tf.name_scope(py_scope) as py_scope: with slim.arg_scope(slim_scope) as slim_scope: ## assigning gt_boxes [assigned_gt_boxes, assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes], [2, 3, 4, 5]) ## build losses for PFN for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] splitted_gt_boxes = assigned_gt_boxes[i - 2] ### rpn losses # 1. encode ground truth # 2. compute distances all_anchors = outputs['rpn'][p]['anchor'] rpn_boxes = outputs['rpn'][p]['box'] rpn_clses = tf.reshape(outputs['rpn'][p]['cls'], (1, height, width, base_anchors, 2)) rpn_clses_target, rpn_boxes_target, rpn_boxes_inside_weight = \ anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, image_height, image_width, scope='AnchorEncoder') rpn_clses_target, rpn_clses, rpn_boxes, rpn_boxes_target, rpn_boxes_inside_weight = \ _filter_negative_samples(tf.reshape(rpn_clses_target, [-1]), [ tf.reshape(rpn_clses_target, [-1]), tf.reshape(rpn_clses, [-1, 2]), tf.reshape(rpn_boxes, [-1, 4]), tf.reshape(rpn_boxes_target, [-1, 4]), tf.reshape(rpn_boxes_inside_weight, [-1, 4]) ]) rpn_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(rpn_clses_target, 0), tf.float32))) rpn_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(rpn_clses_target, 1), tf.float32))) rpn_box_loss = rpn_boxes_inside_weight * _smooth_l1_dist( rpn_boxes, rpn_boxes_target) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) rpn_box_losses.append(rpn_box_loss) ### NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # the BEST practise still should be gathering all none-negative examples rpn_clses_target = slim.one_hot_encoding( rpn_clses_target, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=rpn_clses_target, logits=rpn_clses) rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss) rpn_cls_losses.append(rpn_cls_loss) ### rcnn losses # 1. encode ground truth # 2. compute distances rcnn_ordered_rois = outputs['rcnn_ordered_rois'] rcnn_boxes = outputs['rcnn_boxes'] rcnn_clses = outputs['rcnn_clses'] rcnn_scores = outputs['rcnn_scores'] rcnn_clses_target, rcnn_boxes_target, rcnn_boxes_inside_weight = \ roi_encoder(gt_boxes, rcnn_ordered_rois, num_classes, scope='ROIEncoder') rcnn_clses_target, rcnn_ordered_rois, rcnn_clses, rcnn_scores, rcnn_boxes, rcnn_boxes_target, rcnn_boxes_inside_weight = \ _filter_negative_samples(tf.reshape(rcnn_clses_target, [-1]),[ tf.reshape(rcnn_clses_target, [-1]), tf.reshape(rcnn_ordered_rois, [-1, 4]), tf.reshape(rcnn_clses, [-1, num_classes]), tf.reshape(rcnn_scores, [-1, num_classes]), tf.reshape(rcnn_boxes, [-1, num_classes * 4]), tf.reshape(rcnn_boxes_target, [-1, num_classes * 4]), tf.reshape(rcnn_boxes_inside_weight, [-1, num_classes * 4]) ] ) rcnn_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(rcnn_clses_target, 0), tf.float32))) rcnn_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(rcnn_clses_target, 1), tf.float32))) rcnn_box_loss = rcnn_boxes_inside_weight * _smooth_l1_dist( rcnn_boxes, rcnn_boxes_target) rcnn_box_loss = tf.reshape(rcnn_box_loss, [-1, 4]) rcnn_box_loss = tf.reduce_sum(rcnn_box_loss, axis=1) rcnn_box_loss = rcnn_box_lw * tf.reduce_mean( rcnn_box_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_box_loss) rcnn_box_losses.append(rcnn_box_loss) rcnn_clses_target = slim.one_hot_encoding(rcnn_clses_target, num_classes, on_value=1.0, off_value=0.0) rcnn_cls_loss = rcnn_cls_lw * tf.nn.softmax_cross_entropy_with_logits( labels=rcnn_clses_target, logits=rcnn_clses) rcnn_cls_loss = tf.reduce_mean(rcnn_cls_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, rcnn_cls_loss) rcnn_cls_losses.append(rcnn_cls_loss) outputs['training_rcnn_rois'] = rcnn_ordered_rois outputs['training_rcnn_clses_target'] = rcnn_clses_target outputs['training_rcnn_clses'] = rcnn_clses outputs['training_rcnn_scores'] = rcnn_scores ### mask loss # mask of shape (N, h, w, num_classes) mask_ordered_rois = outputs['mask_ordered_rois'] masks = outputs['mask_mask'] mask_clses_target, mask_targets, mask_inside_weights, mask_rois = \ mask_encoder(gt_masks, gt_boxes, mask_ordered_rois, num_classes, 28, 28,scope='MaskEncoder') mask_clses_target, mask_targets, mask_inside_weights, mask_rois, masks = \ _filter_negative_samples(tf.reshape(mask_clses_target, [-1]), [ tf.reshape(mask_clses_target, [-1]), tf.reshape(mask_targets, [-1, 28, 28, num_classes]), tf.reshape(mask_inside_weights, [-1, 28, 28, num_classes]), tf.reshape(mask_rois, [-1, 4]), tf.reshape(masks, [-1, 28, 28, num_classes]), ]) mask_batch.append( tf.reduce_sum( tf.cast(tf.greater_equal(mask_clses_target, 0), tf.float32))) mask_batch_pos.append( tf.reduce_sum( tf.cast(tf.greater_equal(mask_clses_target, 1), tf.float32))) ### NOTE: w/o competition between classes. mask_loss = mask_inside_weights * tf.nn.sigmoid_cross_entropy_with_logits( labels=mask_targets, logits=masks) mask_loss = mask_lw * mask_loss mask_loss = tf.reduce_mean(mask_loss) mask_loss = tf.cond(tf.greater(tf.size(mask_clses_target), 0), lambda: mask_loss, lambda: tf.constant(0.0)) tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss) mask_losses.append(mask_loss) outputs['training_mask_rois'] = mask_rois outputs['training_mask_clses_target'] = mask_clses_target outputs['training_mask_final_mask'] = tf.nn.sigmoid(masks) outputs['training_mask_final_mask_target'] = mask_targets rpn_box_losses = tf.add_n(rpn_box_losses) rpn_cls_losses = tf.add_n(rpn_cls_losses) rcnn_box_losses = tf.add_n(rcnn_box_losses) rcnn_cls_losses = tf.add_n(rcnn_cls_losses) mask_losses = tf.add_n(mask_losses) losses = [ rpn_box_losses, rpn_cls_losses, rcnn_box_losses, rcnn_cls_losses, mask_losses ] total_loss = tf.add_n(losses) rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32) rcnn_batch = tf.cast(tf.add_n(rcnn_batch), tf.float32) mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32) rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32) rcnn_batch_pos = tf.cast(tf.add_n(rcnn_batch_pos), tf.float32) mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32) return total_loss, losses, [rpn_batch_pos, rpn_batch, \ rcnn_batch_pos, rcnn_batch, \ mask_batch_pos, mask_batch]