def _expand_bbox_targets(bbox_target_data): """Bounding-box regression targets are stored in a compact form in the roidb. This function expands those targets into the 4-of-4*K representation used by the network (i.e. only one class has non-zero targets). The loss weights are similarly expanded. Returns: bbox_target_data (ndarray): N x 4K blob of regression targets bbox_inside_weights (ndarray): N x 4K blob of loss weights """ num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: num_bbox_reg_classes = 2 # bg and fg clss = bbox_target_data[:, 0] bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes)) bbox_inside_weights = blob_utils.zeros(bbox_targets.shape) inds = np.where(clss > 0)[0] for ind in inds: cls = int(clss[ind]) start = 4 * cls end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0) return bbox_targets, bbox_inside_weights
def keypoints_to_heatmap_labels(keypoints, rois): """Encode keypoint location in the target heatmap for use in SoftmaxWithLoss. """ # Maps keypoints from the half-open interval [x1, x2) on continuous image # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image # coordinates. We use the continuous <-> discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS) heatmaps = blob_utils.zeros(shape) weights = blob_utils.zeros(shape) offset_x = rois[:, 0] offset_y = rois[:, 1] scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0]) scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1]) for kp in range(keypoints.shape[2]): vis = keypoints[:, 2, kp] > 0 x = keypoints[:, 0, kp].astype(np.float32) y = keypoints[:, 1, kp].astype(np.float32) # Since we use floor below, if a keypoint is exactly on the roi's right # or bottom boundary, we shift it in by eps (conceptually) to keep it in # the ground truth heatmap. x_boundary_inds = np.where(x == rois[:, 2])[0] y_boundary_inds = np.where(y == rois[:, 3])[0] x = (x - offset_x) * scale_x x = np.floor(x) if len(x_boundary_inds) > 0: x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 y = (y - offset_y) * scale_y y = np.floor(y) if len(y_boundary_inds) > 0: y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1 valid_loc = np.logical_and( np.logical_and(x >= 0, y >= 0), np.logical_and( x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE)) valid = np.logical_and(valid_loc, vis) valid = valid.astype(np.int32) lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x heatmaps[:, kp] = lin_ind * valid weights[:, kp] = valid return heatmaps, weights
def add_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): M = cfg.UVRCNN.HEATMAP_SIZE IsFlipped = roidb['flipped'] # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.UVRCNN.NUM_PATCHES # U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in range(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['uv_rois'] = np.array(rois_fg) blobs['roi_has_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['uv_X_points'] = X_points.astype(np.float32) blobs['uv_Y_points'] = Y_points.astype(np.float32) blobs['uv_Ind_points'] = Ind_points.astype(np.float32) blobs['uv_I_points'] = I_points.astype(np.float32) blobs['uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['uv_V_points'] = V_points.astype(np.float32) blobs['uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def add_parsing_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add parsing R-CNN specific blobs to the input blob dictionary.""" # Prepare the parsing targets by associating one gt parsing to each training roi # that has a fg (non-bg) class label. M = cfg.PRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] parsing_gt = [roidb['parsing'][i] for i in polys_gt_inds] boxes_from_png = parsing_utils.parsing_to_boxes(parsing_gt, roidb['flipped']) fg_inds = np.where(blobs['labels_int32'] > 0)[0] if fg_inds.shape[0] > 0: if cfg.PRCNN.ROI_BATCH_SIZE > 0: fg_rois_per_this_image = np.minimum(cfg.PRCNN.ROI_BATCH_SIZE, fg_inds.shape[0]) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) parsings = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_png.astype(np.float32, copy=False)) # Map from each fg rois to the index of the parsing with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] parsing_gt_fg = parsing_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image parsing = parsing_utils.parsing_wrt_box(parsing_gt_fg, roi_fg, M, roidb['flipped']) parsings[i, :] = parsing weights = blob_utils.ones((rois_fg.shape[0], M**2)) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) parsings = blob_utils.zeros((1, M**2), int32=True) # Mark that the first roi has a mask weights = blob_utils.zeros((1, M**2)) parsings = np.reshape(parsings, (-1, 1)) weights = np.reshape(weights, (-1, 1)) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['parsing_rois'] = rois_fg blobs['parsing_weights'] = weights blobs['parsing_int32'] = parsings
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: if cfg.MRCNN.ROI_BATCH_SIZE > 0: fg_rois_per_this_image = np.minimum(cfg.MRCNN.ROI_BATCH_SIZE, fg_inds.shape[0]) fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_keypoint_rcnn_blobs(blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0] if kp_fg_inds.size > 0: kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if cfg.KRCNN.ROI_BATCH_SIZE > 0: kp_fg_rois_per_this_image = np.minimum(kp_fg_rois_per_this_image, cfg.KRCNN.ROI_BATCH_SIZE) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice(kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False) sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 if cfg.KRCNN.GAUSS_HEATMAP: heats, weights = keypoint_utils.keypoints_to_gauss_heatmap_labels( sampled_keypoints, sampled_fg_rois) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS) shape_heats = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.HEATMAP_SIZE, cfg.KRCNN.HEATMAP_SIZE) heats = heats.reshape(shape_heats) weights = weights.reshape(shape) else: heats, weights = keypoint_utils.keypoints_to_heatmap_labels( sampled_keypoints, sampled_fg_rois) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) heats = heats.reshape(shape) weights = weights.reshape(shape) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a kp # We simply take the first bg roi. kp_bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... if (len(kp_bg_inds) == 0): sampled_fg_rois = roidb['boxes'][0].reshape((1, -1)) else: sampled_fg_rois = roidb['boxes'][kp_bg_inds[0]].reshape((1, -1)) if cfg.KRCNN.GAUSS_HEATMAP: shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS) shape_heats = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.HEATMAP_SIZE, cfg.KRCNN.HEATMAP_SIZE) heats = blob_utils.zeros(shape_heats) weights = blob_utils.zeros(shape) else: shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) heats = blob_utils.zeros(shape) weights = blob_utils.zeros(shape) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1)) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois if cfg.KRCNN.GAUSS_HEATMAP: blobs['keypoint_locations_int32'] = heats.astype(np.float32, copy=False) else: blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = min(2.**lvl, cfg.FPN.BACKBONE_STRIDE) anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] if cfg.MODEL.UV_ON: valid_keys.extend([ 'flipped', 'ignore_UV_body', 'dp_x', 'dp_y', 'dp_I', 'dp_U', 'dp_V', 'dp_masks', 'has_uv' ]) if cfg.MODEL.PARSING_ON: valid_keys.extend(['parsing', 'has_parsing']) if 'flipped' not in valid_keys: valid_keys.append('flipped') minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb # Always return valid=True, since RPN minibatches are valid by design return True