def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info, init_state0): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0] bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0] keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[len(fg_inds):] = 0 rois = all_rois[keep_inds] #init_states = init_state0[keep_inds] init_states = init_state0 bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, 21) scaled_rois = rois[:, 1:5] / float(im_scale) scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # regression targets is the intersection of bounding box and gt mask ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights, init_states
def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0] bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0] keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[len(fg_inds):] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, 21) scaled_rois = rois[:, 1:5] / float(im_scale) scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # regression targets is the intersection of bounding box and gt mask ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks, im_scale, mask_info): """ Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = [] for i in xrange(len(cfg.TRAIN.FG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0] cur_rois_this_image = min(cur_inds.size, np.round(rois_per_image * cfg.TRAIN.FG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) fg_inds = np.hstack((fg_inds, cur_inds)) fg_inds = np.unique(fg_inds) fg_rois_per_image = fg_inds.size # Sample background indexes according to number of foreground bg_rois_per_this_image = rois_per_image - fg_rois_per_image bg_inds = [] for i in xrange(len(cfg.TRAIN.BG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0] cur_rois_this_image = min(cur_inds.size, np.round(bg_rois_per_this_image * cfg.TRAIN.BG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) bg_inds = np.hstack((bg_inds, cur_inds)) bg_inds = np.unique(bg_inds) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, num_classes) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) blobs = { 'rois': rois, 'labels': labels, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } if cfg.MNC_MODE: scaled_rois = rois[:, 1:5] / float(im_scale) # map to original image space scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # calculate mask regression targets # (intersection of bounding box and gt mask) ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box mask_weight = np.zeros((rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) # only assign box-level foreground as positive mask regression mask_weight[0:len(fg_inds), :, :, :] = 1 blobs['mask_targets'] = pos_masks blobs['mask_weight'] = mask_weight blobs['gt_masks_info'] = top_mask_info return blobs, fg_inds, bg_inds, keep_inds
def _sample_blobs(self, roidbs, img_names): random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=cfg.TRAIN.IMS_PER_BATCH) im_blob, im_scales = self._get_image_blob(roidbs, random_scale_inds, img_names) rois_per_img = cfg.TRAIN.BATCH_SIZE / cfg.TRAIN.IMS_PER_BATCH rois_blob = np.zeros((0, 5), dtype=np.float32) masks_blob = np.zeros((0, 1, self.input_mz, self.input_mz)) box_labels_blob = np.zeros((0, 1)) mask_labels_blob = np.zeros((0, 1)) bbox_targets_blob = np.zeros((0, self._num_classes * 4)) mask_targets_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) bbox_inside_weights_blob = np.zeros((0, self._num_classes * 4)) bbox_outside_weights_blob = np.zeros((0, self._num_classes * 4)) mask_weights_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) for im_i, roidb in enumerate(roidbs): # Sample positive/negative using box-level overlap det_overlap = roidb['det_overlap'] num_gt = len(roidb['gt_classes']) fg_det_inds = np.where(det_overlap >= cfg.TRAIN.FG_DET_THRESH) keep_inds = [] for i in xrange(len(cfg.TRAIN.FRACTION_SAMPLE)): cur_keep_inds = np.where( (det_overlap >= cfg.TRAIN.THRESH_LO_SAMPLE[i]) & (det_overlap <= cfg.TRAIN.THRESH_HI_SAMPLE[i]))[0] cur_rois_this_image = np.round(rois_per_img * cfg.TRAIN.FRACTION_SAMPLE[i]) cur_rois_this_image = min(cur_rois_this_image, len(cur_keep_inds)) if cur_keep_inds.size > 0: cur_keep_inds = npr.choice(cur_keep_inds, size=cur_rois_this_image, replace=False) if i == 0: keep_inds = cur_keep_inds else: keep_inds = np.unique(np.hstack( (keep_inds, cur_keep_inds))) fg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds)] bg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds, invert=True)] keep_inds = np.append(fg_inds_det, bg_inds_det).astype(int) # Assign box-level label and mask-level label input_box_labels = roidb['output_label'][keep_inds] # input_box_labels[len(fg_inds_det):] = 0 input_box_labels[len(fg_inds_det):] = 0 seg_overlap = roidb['seg_overlap'][keep_inds] bg_inds_seg = np.where(seg_overlap < cfg.TRAIN.FG_SEG_THRESH)[0] input_mask_labels = input_box_labels.copy() input_mask_labels[bg_inds_seg] = 0 gt_classes = roidb['gt_classes'] input_masks = roidb['masks'][keep_inds, :, :] input_boxes = roidb['boxes'][keep_inds, :] * im_scales[im_i] mask_target = roidb['mask_targets'] mask_target = mask_target[keep_inds, :, :] mask_resize = np.zeros( (input_masks.shape[0], self.input_mz, self.input_mz)) for i in xrange(mask_target.shape[0]): mask_resize[i, :, :] = cv2.resize( input_masks[i, :, :].astype(np.float), (self.input_mz, self.input_mz)) mask_resize = mask_resize >= cfg.BINARIZE_THRESH mask_target_weights = np.zeros(mask_target.shape) mask_target_weights[0:len(fg_inds_det), :, :] = 1 gt_boxes = roidb['boxes'][0:num_gt, :] * im_scales[im_i] gt_assignment = roidb['gt_assignment'][:, 0] bbox_target_data = bbox_compute_targets( input_boxes, gt_boxes[gt_assignment[keep_inds], :4], False) # normalize targets bbox_target_data = np.hstack((input_box_labels, bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, self._num_classes) for i in xrange(len(fg_inds_det)): cls = gt_classes[gt_assignment[fg_inds_det[i]]][0] if cls == 0: continue mean = self._mean std = self._std bbox_targets[i, cls * 4:cls * 4 + 4] -= mean[cls, :] bbox_targets[i, cls * 4:cls * 4 + 4] /= std[cls, :] bbox_outside_weights = np.array(bbox_inside_weights > 0).astype( np.float32) input_boxes = np.hstack((im_i * np.ones( (input_boxes.shape[0], 1)), input_boxes)) bz = input_boxes.shape[0] rois_blob = np.vstack((rois_blob, input_boxes)) masks_blob = np.concatenate( (masks_blob, mask_resize.reshape(bz, 1, self.input_mz, self.input_mz)), axis=0) box_labels_blob = np.concatenate( (box_labels_blob, input_box_labels), axis=0) mask_labels_blob = np.concatenate( (mask_labels_blob, input_mask_labels), axis=0) bbox_targets_blob = np.concatenate( (bbox_targets_blob, bbox_targets), axis=0) mask_targets_blob = np.concatenate( (mask_targets_blob, mask_target.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0) bbox_inside_weights_blob = np.concatenate( (bbox_inside_weights_blob, bbox_inside_weights), axis=0) bbox_outside_weights_blob = np.concatenate( (bbox_outside_weights_blob, bbox_outside_weights), axis=0) mask_weights_blob = np.concatenate( (mask_weights_blob, mask_target_weights.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0) return { 'data': im_blob, 'rois': rois_blob, 'masks': masks_blob, 'box_label': box_labels_blob, 'mask_label': mask_labels_blob, 'bbox_targets': bbox_targets_blob, 'mask_targets': mask_targets_blob, 'bbox_inside_weights': bbox_inside_weights_blob, 'bbox_outside_weights': bbox_outside_weights_blob, 'mask_weight': mask_weights_blob }
def _sample_rois(all_rois, gt_boxes, rois_per_image, num_classes, gt_masks, im_scale, mask_info): """ Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Sample foreground indexes fg_inds = [] for i in xrange(len(cfg.TRAIN.FG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.FG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.FG_THRESH_HI[i]))[0] cur_rois_this_image = min( cur_inds.size, np.round(rois_per_image * cfg.TRAIN.FG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) fg_inds = np.hstack((fg_inds, cur_inds)) fg_inds = np.unique(fg_inds) fg_rois_per_image = fg_inds.size # Sample background indexes according to number of foreground bg_rois_per_this_image = rois_per_image - fg_rois_per_image bg_inds = [] for i in xrange(len(cfg.TRAIN.BG_FRACTION)): cur_inds = np.where((max_overlaps >= cfg.TRAIN.BG_THRESH_LO[i]) & (max_overlaps <= cfg.TRAIN.BG_THRESH_HI[i]))[0] cur_rois_this_image = min( cur_inds.size, np.round(bg_rois_per_this_image * cfg.TRAIN.BG_FRACTION[i])) if cur_inds.size > 0: cur_inds = npr.choice(cur_inds, size=cur_rois_this_image, replace=False) bg_inds = np.hstack((bg_inds, cur_inds)) bg_inds = np.unique(bg_inds) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds).astype(int) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = bbox_compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True) bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, num_classes) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) blobs = { 'rois': rois, 'labels': labels, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } if cfg.MNC_MODE: scaled_rois = rois[:, 1:5] / float(im_scale) # map to original image space scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale) pos_masks = np.zeros((len(keep_inds), 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) top_mask_info = np.zeros((len(keep_inds), 12)) top_mask_info[len(fg_inds):, :] = -1 for i, val in enumerate(fg_inds): gt_box = scaled_gt_boxes[gt_assignment[val]] gt_box = np.around(gt_box).astype(int) ex_box = np.around(scaled_rois[i]).astype(int) gt_mask = gt_masks[gt_assignment[val]] gt_mask_info = mask_info[gt_assignment[val]] gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]] # calculate mask regression targets # (intersection of bounding box and gt mask) ex_mask = intersect_mask(ex_box, gt_box, gt_mask) pos_masks[i, ...] = ex_mask top_mask_info[i, 0] = gt_assignment[val] top_mask_info[i, 1] = gt_mask_info[0] top_mask_info[i, 2] = gt_mask_info[1] top_mask_info[i, 3] = labels[i] top_mask_info[i, 4:8] = ex_box top_mask_info[i, 8:12] = gt_box mask_weight = np.zeros( (rois.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) # only assign box-level foreground as positive mask regression mask_weight[0:len(fg_inds), :, :, :] = 1 blobs['mask_targets'] = pos_masks blobs['mask_weight'] = mask_weight blobs['gt_masks_info'] = top_mask_info return blobs, fg_inds, bg_inds, keep_inds
def _sample_blobs(self, roidbs, img_names): random_scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=cfg.TRAIN.IMS_PER_BATCH) im_blob, im_scales = self._get_image_blob(roidbs, random_scale_inds, img_names) rois_per_img = cfg.TRAIN.BATCH_SIZE / cfg.TRAIN.IMS_PER_BATCH rois_blob = np.zeros((0, 5), dtype=np.float32) masks_blob = np.zeros((0, 1, self.input_mz, self.input_mz)) box_labels_blob = np.zeros((0, 1)) mask_labels_blob = np.zeros((0, 1)) bbox_targets_blob = np.zeros((0, self._num_classes * 4)) mask_targets_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) bbox_inside_weights_blob = np.zeros((0, self._num_classes * 4)) bbox_outside_weights_blob = np.zeros((0, self._num_classes * 4)) mask_weights_blob = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) for im_i, roidb in enumerate(roidbs): # Sample positive/negative using box-level overlap det_overlap = roidb['det_overlap'] num_gt = len(roidb['gt_classes']) fg_det_inds = np.where(det_overlap >= cfg.TRAIN.FG_DET_THRESH) keep_inds = [] for i in xrange(len(cfg.TRAIN.FRACTION_SAMPLE)): cur_keep_inds = np.where((det_overlap >= cfg.TRAIN.THRESH_LO_SAMPLE[i]) & (det_overlap <= cfg.TRAIN.THRESH_HI_SAMPLE[i]))[0] cur_rois_this_image = np.round(rois_per_img * cfg.TRAIN.FRACTION_SAMPLE[i]) cur_rois_this_image = min(cur_rois_this_image, len(cur_keep_inds)) if cur_keep_inds.size > 0: cur_keep_inds = npr.choice(cur_keep_inds, size=cur_rois_this_image, replace=False) if i == 0: keep_inds = cur_keep_inds else: keep_inds = np.unique(np.hstack((keep_inds, cur_keep_inds))) fg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds)] bg_inds_det = keep_inds[np.in1d(keep_inds, fg_det_inds, invert=True)] keep_inds = np.append(fg_inds_det, bg_inds_det).astype(int) # Assign box-level label and mask-level label input_box_labels = roidb['output_label'][keep_inds] # input_box_labels[len(fg_inds_det):] = 0 input_box_labels[len(fg_inds_det):] = 0 seg_overlap = roidb['seg_overlap'][keep_inds] bg_inds_seg = np.where(seg_overlap < cfg.TRAIN.FG_SEG_THRESH)[0] input_mask_labels = input_box_labels.copy() input_mask_labels[bg_inds_seg] = 0 gt_classes = roidb['gt_classes'] input_masks = roidb['masks'][keep_inds, :, :] input_boxes = roidb['boxes'][keep_inds, :] * im_scales[im_i] mask_target = roidb['mask_targets'] mask_target = mask_target[keep_inds, :, :] mask_resize = np.zeros((input_masks.shape[0], self.input_mz, self.input_mz)) for i in xrange(mask_target.shape[0]): mask_resize[i, :, :] = cv2.resize(input_masks[i, :, :].astype(np.float), (self.input_mz, self.input_mz)) mask_resize = mask_resize >= cfg.BINARIZE_THRESH mask_target_weights = np.zeros(mask_target.shape) mask_target_weights[0:len(fg_inds_det), :, :] = 1 gt_boxes = roidb['boxes'][0:num_gt, :] * im_scales[im_i] gt_assignment = roidb['gt_assignment'][:, 0] bbox_target_data = bbox_compute_targets(input_boxes, gt_boxes[gt_assignment[keep_inds], :4], False) # normalize targets bbox_target_data = np.hstack((input_box_labels, bbox_target_data))\ .astype(np.float32, copy=False) bbox_targets, bbox_inside_weights = get_bbox_regression_label( bbox_target_data, self._num_classes) for i in xrange(len(fg_inds_det)): cls = gt_classes[gt_assignment[fg_inds_det[i]]][0] if cls == 0: continue mean = self._mean std = self._std bbox_targets[i, cls*4:cls*4+4] -= mean[cls, :] bbox_targets[i, cls*4:cls*4+4] /= std[cls, :] bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) input_boxes = np.hstack((im_i * np.ones((input_boxes.shape[0], 1)), input_boxes)) bz = input_boxes.shape[0] rois_blob = np.vstack((rois_blob, input_boxes)) masks_blob = np.concatenate((masks_blob, mask_resize.reshape(bz, 1, self.input_mz, self.input_mz)), axis=0) box_labels_blob = np.concatenate((box_labels_blob, input_box_labels), axis=0) mask_labels_blob = np.concatenate((mask_labels_blob, input_mask_labels), axis=0) bbox_targets_blob = np.concatenate((bbox_targets_blob, bbox_targets), axis=0) mask_targets_blob = np.concatenate((mask_targets_blob, mask_target.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0) bbox_inside_weights_blob = np.concatenate((bbox_inside_weights_blob, bbox_inside_weights), axis=0) bbox_outside_weights_blob = np.concatenate((bbox_outside_weights_blob, bbox_outside_weights), axis=0) mask_weights_blob = np.concatenate((mask_weights_blob, mask_target_weights.reshape(bz, 1, cfg.MASK_SIZE, cfg.MASK_SIZE)), axis=0) return { 'data': im_blob, 'rois': rois_blob, 'masks': masks_blob, 'box_label': box_labels_blob, 'mask_label': mask_labels_blob, 'bbox_targets': bbox_targets_blob, 'mask_targets': mask_targets_blob, 'bbox_inside_weights': bbox_inside_weights_blob, 'bbox_outside_weights': bbox_outside_weights_blob, 'mask_weight': mask_weights_blob }