def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights) return blob_dict
def _sample_rois_stage_3(roidb, im_scale, batch_idx): rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps_stage_3 = roidb['max_overlaps_stage_3'] fg_inds_stage_3 = np.where( max_overlaps_stage_3 >= cfg.TRAIN.CASCADE_THRESHOLDS[2])[0] fg_rois_stage_3_per_this_image = np.minimum(fg_rois_per_image, fg_inds_stage_3.size) if fg_inds_stage_3.size > 0: fg_inds_stage_3 = npr.choice(fg_inds_stage_3, size=fg_rois_stage_3_per_this_image, replace=False) bg_inds_stage_3 = np.where( (max_overlaps_stage_3 < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps_stage_3 >= cfg.TRAIN.BG_THRESH_LO))[0] bg_rois_stage_3_per_this_image = rois_per_image - fg_rois_stage_3_per_this_image bg_rois_stage_3_per_this_image = np.minimum(bg_rois_stage_3_per_this_image, bg_inds_stage_3.size) if bg_inds_stage_3.size > 0: bg_inds_stage_3 = npr.choice(bg_inds_stage_3, size=bg_rois_stage_3_per_this_image, replace=False) keep_inds_stage_3 = np.append(fg_inds_stage_3, bg_inds_stage_3) sampled_labels_stage_3 = roidb['max_classes_stage_3'][keep_inds_stage_3] sampled_labels_stage_3[ fg_rois_stage_3_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes_stage_3 = roidb['boxes_stage_3'][keep_inds_stage_3] bbox_targets_stage_3, bbox_inside_weights_stage_3 = _expand_bbox_targets( roidb['bbox_targets_stage_3'][keep_inds_stage_3, :]) bbox_outside_weights_stage_3 = np.array( bbox_inside_weights_stage_3 > 0, dtype=bbox_inside_weights_stage_3.dtype) sampled_rois_stage_3 = sampled_boxes_stage_3 * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois_stage_3.shape[0], 1)) sampled_rois_stage_3 = np.hstack( (repeated_batch_idx, sampled_rois_stage_3)) blob_dict = dict(labels_stage_3=sampled_labels_stage_3.astype(np.int32, copy=False), rois_stage_3=sampled_rois_stage_3, bbox_targets_stage_3=bbox_targets_stage_3, bbox_inside_weights_stage_3=bbox_inside_weights_stage_3, bbox_outside_weights_stage_3=bbox_outside_weights_stage_3) return blob_dict
def add_keypoint_rcnn_blobs( blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible) )[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice( kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False ) sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype ) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_heatmap_labels( sampled_keypoints, sampled_fg_rois ) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) heats = heats.reshape(shape) weights = weights.reshape(shape) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1) ) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights
def add_keypoint_rcnn_blobs( blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ): """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary.""" # Note: gt_inds must match how they're computed in # datasets.json_dataset._merge_proposal_boxes_into_roidb gt_inds = np.where(roidb['gt_classes'] > 0)[0] max_overlaps = roidb['max_overlaps'] gt_keypoints = roidb['gt_keypoints'] ind_kp = gt_inds[roidb['box_to_gt_ind_map']] within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes']) vis_kp = gt_keypoints[ind_kp, 2, :] > 0 is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0 kp_fg_inds = np.where( np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible) )[0] kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size) if kp_fg_inds.size > kp_fg_rois_per_this_image: kp_fg_inds = np.random.choice( kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False ) sampled_fg_rois = roidb['boxes'][kp_fg_inds] box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds] num_keypoints = gt_keypoints.shape[2] sampled_keypoints = -np.ones( (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints), dtype=gt_keypoints.dtype ) for ii in range(len(sampled_fg_rois)): ind = box_to_gt_ind_map[ii] if ind >= 0: sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :] assert np.sum(sampled_keypoints[ii, 2, :]) > 0 heats, weights = keypoint_utils.keypoints_to_heatmap_labels( sampled_keypoints, sampled_fg_rois ) shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1) heats = heats.reshape(shape) weights = weights.reshape(shape) sampled_fg_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_fg_rois.shape[0], 1) ) sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois)) blobs['keypoint_rois'] = sampled_fg_rois blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False) blobs['keypoint_weights'] = weights
def add_classification_blobs(blobs, im_scales, roidb): """Add blobs needed for training classification models.""" # Sample training RoIs from each image and append them to the blob lists for im_i, entry in enumerate(roidb): blobs['rois'].append(im_i * blob_utils.ones( (entry['gt_classes'].shape[0], 1))) blobs['labels_int32'].append(entry['gt_classes'].astype(np.int32)) # Concat the training blob lists into tensors for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid = True return valid
def _sample_da_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs for domain adaptation. """ # max_overlaps = roidb['max_overlaps'] da_boxes = roidb['da_boxes'] rois_per_image = min(da_boxes.shape[0], int(cfg.TRAIN.BATCH_SIZE_PER_IM)) # The indices that we're selecting (both fg and bg) # keep_inds = np.append(fg_inds, bg_inds) # keep_inds = npr.choice(range(max_overlaps.shape[0]), size=rois_per_image, replace=False) keep_inds = np.arange( rois_per_image) # take top k, they are ordered by rpn_prob. # keep_inds = np.arange(max_overlaps.shape[0]) # Label is the class each RoI has max overlap with # sampled_labels = roidb['max_classes'][keep_inds] # sampled_labels[fg_rois_per_image:] = 0 # Label bg RoIs with class 0 # sampled_boxes = roidb['boxes'][keep_inds] # sampled_boxes = da_boxes[keep_inds,:] # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = da_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(da_rois=sampled_rois) # add Domain Adaptive R-CNN blobs if roidb['is_source']: blob_dict['dc_label'] = np.expand_dims(np.ones(rois_per_image, dtype=np.int32), axis=1) else: blob_dict['dc_label'] = np.expand_dims(np.zeros(rois_per_image, dtype=np.int32), axis=1) if cfg.TRAIN.PADA: blob_dict['pada_roi_weights'] = roidb['pada_roi_weights'] blob_dict['da_dc_mask'] = np.full(rois_per_image, roidb['is_source'].astype(bool)) return blob_dict
def add_track_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Track R-CNN specific blobs to the input blob dictionary.""" # Prepare the track targets by associating one gt track to each training roi # that has a fg (non-bg) class label. max_track = roidb['track_ids'][roidb['box_to_gt_ind_map']] sampled_rois = roidb['boxes'] # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) sampled_rois *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Update blobs dict with Track R-CNN blobs blobs['track_rois'] = sampled_rois blobs['track_ids_int32'] = max_track blobs['track_n_rois'] = np.array([len(sampled_rois)], dtype=np.int32)
def _expand_to_class_specific_mask_targets(masks, mask_class_labels): """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) to encode class specific mask targets. """ assert masks.shape[0] == mask_class_labels.shape[0] M = cfg.MRCNN.RESOLUTION # Target values of -1 are "don't care" / ignore labels mask_targets = -blob_utils.ones( (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True) for i in range(masks.shape[0]): cls = int(mask_class_labels[i]) start = M**2 * cls end = start + M**2 # Ignore background instance # (only happens when there is no fg samples in an image) if cls > 0: mask_targets[i, start:end] = masks[i, :] return mask_targets
def _expand_to_class_specific_mask_targets(masks, mask_class_labels): """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2) to encode class specific mask targets. """ assert masks.shape[0] == mask_class_labels.shape[0] M = cfg.MRCNN.RESOLUTION # Target values of -1 are "don't care" / ignore labels mask_targets = -blob_utils.ones( (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True ) for i in range(masks.shape[0]): cls = int(mask_class_labels[i]) start = M**2 * cls end = start + M**2 # Ignore background instance # (only happens when there is no fg samples in an image) if cls > 0: mask_targets[i, start:end] = masks[i, :] return mask_targets
def _sample_da_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs for domain adaptation. """ max_overlaps = roidb['max_overlaps'] rois_per_image = min(max_overlaps.shape[0], int(cfg.TRAIN.BATCH_SIZE_PER_IM)) # The indices that we're selecting (both fg and bg) # keep_inds = np.append(fg_inds, bg_inds) keep_inds = npr.choice(range(max_overlaps.shape[0]), size=rois_per_image, replace=False) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] # sampled_labels[fg_rois_per_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(da_rois=sampled_rois) # optionally add Domain Adaptive R-CNN blobs if cfg.TRAIN.DOMAIN_ADAPTATION: if roidb['is_source']: blob_dict['dc_label'] = np.expand_dims(np.ones( sampled_labels.shape, dtype=np.int32), axis=1) else: blob_dict['dc_label'] = np.expand_dims(np.zeros( sampled_labels.shape, dtype=np.int32), axis=1) return blob_dict
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) )[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False ) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :] ) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype ) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs( blob_dict, sampled_boxes, roidb, im_scale, batch_idx ) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ) return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION input_w = roidb['input_width'] input_h = roidb['input_height'] polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 mask_fg_rois_per_this_image = cfg.MRCNN.MAX_ROIS_PER_IM if fg_inds.shape[0] > 0: if fg_inds.size > mask_fg_rois_per_this_image: fg_inds = np.random.choice(fg_inds, size=mask_fg_rois_per_this_image, replace=False) # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) all_person_masks = np.zeros( (int(input_h / im_scale), int(input_w / im_scale)), dtype=np.float32) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # to an box_h x box_w binary image mask_wrt_bbox = segm_utils.convert_polys_to_mask_wrt_box( poly_gt, roi_fg) start_y, start_x = int(roi_fg[1]), int(roi_fg[0]) end_y, end_x = start_y + mask_wrt_bbox.shape[ 0], start_x + mask_wrt_bbox.shape[1] all_person_masks[start_y:end_y, start_x:end_x] = mask_wrt_bbox proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) for i in range(rois_fg.shape[0]): roi_fg = rois_fg[i] w = roi_fg[2] - roi_fg[0] h = roi_fg[3] - roi_fg[1] w = int(np.maximum(w, 1)) h = int(np.maximum(h, 1)) proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[1]) + h, int(roi_fg[0]):int(roi_fg[0]) + w] # proposal_mask = proposal_mask.astype(np.float32) proposal_mask = cv2.resize(proposal_mask, (M, M)) proposal_mask = (proposal_mask > 0.5).astype(np.int32) proposal_all_mask[i] = proposal_mask else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 proposal_all_mask = -blob_utils.ones((1, M, M), int32=True) if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks # blobs['mask_labels'] = np.argmax(masks.reshape((-1,cfg.MODEL.NUM_CLASSES,M,M)),axis=1).reshape((-1,M,M)).astype(np.int32) # blobs['mask_weights'] = np.ones(blobs['mask_labels'].shape, dtype=np.float32) # add by wxh if cfg.MRCNN.USE_CLS_EMBS: fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs( masks.reshape((-1, cfg.MODEL.NUM_CLASSES, M, M))) # print('fg',fg_embs.max(), fg_embs.min()) # print('bg',bg_embs.max(), bg_embs.min()) fg_norms = np.sum(fg_embs, axis=(1, 2)) fg_norms[fg_norms != 0] = 28. * 28. / (fg_norms[fg_norms != 0] + 1e-6) bg_norms = np.sum(bg_embs, axis=(1, 2)) bg_norms[bg_norms != 0] = 28. * 28. / (bg_norms[bg_norms != 0] + 1e-6) blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)), 2, axis=1) blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)), 2, axis=1) blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1) blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1) blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1), dtype=np.int32) # blobs['mask_emb_weights'] = np.vstack([fg_weights, bg_weights]).reshape((-1,1)).astype(np.float32) if cfg.MRCNN.BBOX_CASCADE_MASK_ON: blobs['inter_masks_int32'] = proposal_all_mask
def _sample_rois(roidb, im_scale, im_crop, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) rois_this_image = np.minimum(rois_per_image, roidb['boxes'].shape[0]) if False: choice = np.random.choice(roidb['boxes'].shape[0], rois_this_image, replace=False) sampled_boxes = roidb['boxes'][choice, :].copy() obn_scores = roidb['obn_scores'][choice, :].copy() sampled_scores = np.add(obn_scores, 1.0) else: sampled_boxes = roidb['boxes'][:rois_this_image].copy() obn_scores = roidb['obn_scores'][:rois_this_image].copy() sampled_scores = np.add(obn_scores, 1.0) # Scale rois and format as (batch_idx, x1, y1, x2, y2) # sampled_rois = sampled_boxes * im_scale sampled_rois = _project_im_rois(sampled_boxes, im_scale, im_crop) repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_inds = np.where(roidb['gt_classes'] > 0)[0] np.delete(sampled_rois, gt_inds, 0) np.delete(sampled_scores, gt_inds, 0) if cfg.WSL.CONTEXT and False: sampled_boxes = roidb['boxes'][:rois_this_image].copy() sampled_boxes_inner, sampled_boxes_outer = get_inner_outer_rois( sampled_boxes, cfg.WSL.CONTEXT_RATIO) sampled_rois_origin = _project_im_rois(sampled_boxes, im_scale, im_crop) sampled_rois_inner = _project_im_rois(sampled_boxes_inner, im_scale, im_crop) sampled_rois_outer = _project_im_rois(sampled_boxes_outer, im_scale, im_crop) repeated_batch_idx_inner = batch_idx * blob_utils.ones( (sampled_rois_origin.shape[0], 1)) repeated_batch_idx_outer = batch_idx * blob_utils.ones( (sampled_rois_origin.shape[0], 1)) sampled_rois_frame = np.hstack( (repeated_batch_idx, sampled_rois_origin, sampled_rois_inner)) sampled_rois_context = np.hstack( (repeated_batch_idx, sampled_rois_outer, sampled_rois_origin)) # Delete GT Boxes np.delete(sampled_rois_frame, gt_inds, 0) np.delete(sampled_rois_context, gt_inds, 0) # Get image label img_labels_oh = np.zeros((1, cfg.MODEL.NUM_CLASSES - 1), dtype=np.float32) img_labels = np.zeros((1), dtype=np.float32) # gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] gt_inds = np.where(roidb['gt_classes'] > 0)[0] assert len(gt_inds) > 0, roidb['gt_classes'] assert len( gt_inds ) > 0, 'Empty ground truth empty for image is not allowed. Please check.' gt_classes = roidb['gt_classes'][gt_inds].copy() num_valid_objs = gt_classes.shape[0] for o in range(num_valid_objs): img_labels_oh[0][gt_classes[o] - 1] = 1 img_labels[0] = gt_classes[o] - 1 blob_dict = dict( labels_int32=img_labels.astype(np.int32, copy=False), labels_oh=img_labels_oh.astype(np.float32, copy=False), rois=sampled_rois.astype(np.float32, copy=False), obn_scores=sampled_scores, ) if cfg.WSL.CONTEXT and False: blob_dict['rois_frame'] = sampled_rois_frame.astype(np.float32, copy=False) blob_dict['rois_context'] = sampled_rois_context.astype(np.float32, copy=False) # Optionally add Mask R-CNN blobs # if cfg.MODEL.MASK_ON: # mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, # im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict
def _sample_rois(roidb, im_scale, batch_idx, stage): """Generate a random sample of RoIs comprising foreground and background examples. """ fg_thresh = cfg.CASCADE_RCNN.FG_THRESHS[stage - 1] bg_thresh_hi = cfg.CASCADE_RCNN.BG_THRESHS_HI[stage - 1] bg_thresh_lo = cfg.CASCADE_RCNN.BG_THRESHS_LO[stage - 1] max_overlaps = roidb["max_overlaps"] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= fg_thresh)[0] fg_rois_per_this_image = fg_inds.size # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo))[0] # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb["max_classes"][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb["boxes"][keep_inds] gt_inds = np.where(roidb["gt_classes"] > 0)[0] gt_boxes = roidb["boxes"][gt_inds, :] gt_assignments = gt_inds[roidb["box_to_gt_ind_map"][keep_inds]] # [mapped_gt_boxes, max_overlaps] mapped_gt_boxes = blob_utils.zeros((keep_inds.size, 5)) mapped_gt_boxes[:, :4] = gt_boxes[gt_assignments, :] * im_scale mapped_gt_boxes[:, 4] = max_overlaps[keep_inds] mapped_gt_boxes[fg_rois_per_this_image:, :] = 0 if "bbox_targets" not in roidb: bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], sampled_labels, stage) else: bbox_targets = roidb['bbox_targets'][keep_inds, :] bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_targets) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Cascade R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights, mapped_gt_boxes=mapped_gt_boxes, ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON and cfg.MRCNN.AT_STAGE == stage: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON and cfg.KRCNN.AT_STAGE == stage: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_this_image, fg_inds, im_scale, batch_idx, fg_thresh) return blob_dict
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) )[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False ) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :] ) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype ) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs( blob_dict, sampled_boxes, roidb, im_scale, batch_idx ) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ) ### added by srmani if cfg.TRAIN.SOFT_SAMPLING_ON: ss_fg_inds = np.where(max_overlaps[keep_inds] >= 0.5)[0] ss_bg_inds = np.where(max_overlaps[keep_inds] < 0.5)[0] label_weights = np.zeros(keep_inds.shape[0], dtype=max_overlaps.dtype) label_weights[ss_fg_inds] = 1 # high weight for positive instances sigma1 = 0.25 sigma2 = 50.0 sigma3 = 20.0 label_weights[ss_bg_inds] = sigma1 + (1-sigma1)*np.exp(-sigma2*np.exp(-sigma3 * max_overlaps[keep_inds][ss_bg_inds])) blob_dict['label_weights'] = label_weights ### end added by srmani return blob_dict
def _sample_rois(roidb, im_scale, batch_idx, label_code): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 action_sampled_labels = roidb['max_classes'][keep_inds] action_sampled_labels[ fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] # code.interact(local=locals()) if 'bbox_targets' not in roidb: gt_inds = np.where(roidb['gt_classes'] > 0)[0] gt_boxes = roidb['boxes'][gt_inds, :] gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]] bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], sampled_labels) bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_targets) # code.interact(local=locals()) else: bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # new version for hierachy training for idx in range(0, len(sampled_labels)): label_int = sampled_labels[idx] if label_int != 0: sampled_labels[idx] = 1 action_sampled_labels[idx] = 0 multi_label_str = label_code['idx_to_label'][str(label_int)] multi_label = map(int, list(multi_label_str)) for k in range(0, 14): if int(multi_label[k + 1]) == 1: action_sampled_labels[idx] = k + 1 else: sampled_labels[idx] = 0 action_sampled_labels[idx] = 0 # Base Fast R-CNN blobs blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False), multilabels_int32=action_sampled_labels.astype( np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights) # previous version for multilabel training ''' expand_sampled_labels = np.expand_dims(sampled_labels,axis=2) expand_sampled_labels = np.repeat(expand_sampled_labels,81,axis=1) for idx in range(0,len(sampled_labels)): label_int = sampled_labels[idx] if label_int != 0: sampled_labels[idx] = 1 multi_label_str = label_code['idx_to_label'][str(label_int)] multi_label = map(int, list(multi_label_str)) expand_sampled_labels[idx] = multi_label else: sampled_labels[idx] = 0 expand_sampled_labels[idx] = [0]*81 # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), multilabels_int32=expand_sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) ''' # Base Fast R-CNN blobs ''' blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) ''' # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. 获得fg和bg roi """ # 512 rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) # 128 fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap # 选择overlaps大于给定值的作为正样本 fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs # fg的数量 fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement # 随机选择fg_rois_per_this_image个正样本 if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where( (max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) )[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement # 选择bg目标 if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False ) # The indices that we're selecting (both fg and bg) # 合并fg和bg keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] # bg rois的标签置为0 sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :] ) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype ) # Scale rois and format as (batch_idx, x1, y1, x2, y2) # 构造选择的选择的rois # 获得相对于原始图片大小的rois sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs( blob_dict, sampled_boxes, roidb, im_scale, batch_idx ) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx ) return blob_dict
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] input_w = roidb['input_width'] input_h = roidb['input_height'] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] all_person_masks = np.zeros((int(input_h), int(input_w)), dtype=np.float32) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): # controle the number of roi if fg_inds.shape[0] > 6: fg_inds = fg_inds[:6] for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) rois = np.copy(rois_fg) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## # proposal based segmentation p_mask = (Ilabel > 0).astype(np.float32) target_roi = roi_gt * im_scale p_mask = cv2.resize(p_mask, (int(target_roi[2] - target_roi[0]), int(target_roi[3] - target_roi[1]))) p_mask = (p_mask > 0.5).astype(np.float32) start_y, start_x = int(target_roi[1]), int(target_roi[0]) end_y, end_x = start_y + p_mask.shape[0], start_x + p_mask.shape[1] # if all_person_masks[start_y:end_y, start_x:end_x].shape[0]!=p_mask.shape[0] or all_person_masks[start_y:end_y, start_x:end_x].shape[1]!=p_mask.shape[1]: # print('shape exception:',all_person_masks[start_y:end_y, start_x:end_x].shape,p_mask.shape) # print('roi:',target_roi) # print(start_y,end_y, start_x,end_x) # print('input image:',all_person_masks.shape) # assert False all_person_masks[start_y:end_y, start_x:end_x] = p_mask else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # u_points = np.copy(U_points) v_points = np.copy(V_points) U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # # person masks here person_mask = (All_labels > 0).astype(np.int32) # extra # index_targets = np.zeros_like(person_mask).reshape((-1,M,M)).astype(np.int32) # index_targets_weights = np.zeros_like(index_targets) # u_targets = np.zeros((index_targets.shape[0],25,M,M),dtype=np.float32) # v_targets = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32) # uv_weights = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32) # for ibatch in range(index_targets.shape[0]): # for i_surface in range(1,K+1): # points_i = I_points[ibatch] == i_surface # if len(points_i)>0: # points_x = np.asarray(X_points[ibatch][points_i], dtype=np.int32).reshape((-1,1)) # points_y = np.asarray(Y_points[ibatch][points_i], dtype=np.int32).reshape((-1,1)) # points_u = u_points[ibatch][points_i].reshape((1, -1)) # points_v = v_points[ibatch][points_i].reshape((1, -1)) # locs = np.hstack([points_x, points_y]) # # for step in [1]: # x_plus_locs = np.copy(points_x) + step # y_plus_locs = np.copy(points_y) + step # x_minus_locs = np.copy(points_x) - step # y_minus_locs = np.copy(points_y) - step # # locs = np.vstack([locs, np.hstack([x_plus_locs, y_plus_locs])]) # locs = np.vstack([locs, np.hstack([x_plus_locs, y_minus_locs])]) # locs = np.vstack([locs, np.hstack([x_minus_locs, y_plus_locs])]) # locs = np.vstack([locs, np.hstack([x_minus_locs, y_minus_locs])]) # # locs[locs < 0] = 0. # locs[locs >= M] = M - 1 # # points_u = np.repeat(points_u, 5, axis=0).reshape((-1)) # points_v = np.repeat(points_v, 5, axis=0).reshape((-1)) # # # index_targets[ibatch][locs[:,1], locs[:, 0]] = i_surface # index_targets_weights[ibatch][locs[:, 1], locs[:, 0]] = 1 # u_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_u # v_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_v # uv_weights[ibatch, i_surface][locs[:, 1], locs[:, 0]] = 1. # if random.random() <= 0.5: # _,index_targets[ibatch], v_targets[ibatch], v_targets[ibatch], index_targets_weights[ibatch], uv_weights[ibatch] = expand_dp_targets(All_labels[ibatch].reshape((M,M)), # index_targets[ibatch], v_targets[ibatch], # v_targets[ibatch], # index_targets_weights[ibatch], # uv_weights[ibatch]) # proposal all masks here if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) for i in range(rois_fg.shape[0]): roi_fg = rois_fg[i][1:] proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[3]), int(roi_fg[0]):int(roi_fg[2])] proposal_mask = cv2.resize(proposal_mask, (M, M)) proposal_mask = (proposal_mask > 0.5).astype(np.int32) proposal_all_mask[i] = proposal_mask else: proposal_all_mask = -blob_utils.ones( (1, M, M), int32=True) * 0 ## zeros ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_mask_labels'] = person_mask.reshape((-1, M, M)) blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32) ################### # extra # blobs['body_uv_Index_targets'] = index_targets # blobs['body_uv_Index_targets_weights'] = index_targets_weights.astype(np.float32) # blobs['body_uv_U_targets'] = u_targets # blobs['body_uv_V_targets'] = v_targets # blobs['body_uv_weights'] = uv_weights ################ # add by wxh if cfg.BODY_UV_RCNN.USE_CLS_EMBS: fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs( All_labels.reshape((-1, M, M))) # print('fg',fg_embs.max(), fg_embs.min()) # print('bg',bg_embs.max(), bg_embs.min()) fg_norms = np.sum(fg_embs, axis=(1, 2)) fg_norms[fg_norms != 0] = 56. * 56. / fg_norms[fg_norms != 0] bg_norms = np.sum(bg_embs, axis=(1, 2)) bg_norms[bg_norms != 0] = 56. * 56. / bg_norms[bg_norms != 0] blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)), 2, axis=1) blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)), 2, axis=1) blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1) blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1) blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_weights'] = np.vstack([fg_weights, bg_weights]).reshape( (-1, 1)).astype(np.float32) if cfg.BODY_UV_RCNN.USE_BOX_ALL_MASKS: blobs['body_masks_wrt_box'] = proposal_all_mask
def _sample_rois(roidb, im_scale, batch_idx, stage_num): """Generate a random sample of RoIs comprising foreground and background examples. """ # set improving rcnn iou threshold for cascade rcnn bg_thresh_lo = cfg.TRAIN.BG_THRESH_LO if stage_num == 1: fg_thresh = cfg.TRAIN.FG_THRESH # 0.5 bg_thresh_hi = cfg.TRAIN.BG_THRESH_HI # 0.5 elif stage_num == 2: fg_thresh = 0.6 bg_thresh_hi = 0.6 elif stage_num == 3: fg_thresh = 0.7 bg_thresh_hi = 0.7 rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= fg_thresh)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] sampled_max_overlaps = roidb['max_overlaps'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs if stage_num == 1: blob_dict = dict(labels_int32_1st=sampled_labels.astype(np.int32, copy=False), rois_1st=sampled_rois, bbox_targets_1st=bbox_targets, bbox_inside_weights_1st=bbox_inside_weights, bbox_outside_weights_1st=bbox_outside_weights, max_overlaps_1st=sampled_max_overlaps) if stage_num == 2: blob_dict = dict(labels_int32_2nd=sampled_labels.astype(np.int32, copy=False), rois_2nd=sampled_rois, bbox_targets_2nd=bbox_targets, bbox_inside_weights_2nd=bbox_inside_weights, bbox_outside_weights_2nd=bbox_outside_weights, max_overlaps_2nd=sampled_max_overlaps) if stage_num == 3: blob_dict = dict(labels_int32_3rd=sampled_labels.astype(np.int32, copy=False), rois_3rd=sampled_rois, bbox_targets_3rd=bbox_targets, bbox_inside_weights_3rd=bbox_inside_weights, bbox_outside_weights_3rd=bbox_outside_weights) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ if cfg.REID.PSE_ON: img_labels = np.array([0], dtype=np.float32) attr_img_labels = np.array([0], dtype=np.float32) weight = np.array([0.0], dtype=np.float32) attr_weight = np.array([0.0], dtype=np.float32) gt_inds = np.where(roidb['gt_classes'] > 0)[0] assert len(gt_inds) <= 2, 'Only one ground truth for image is allowed.' gt_classes = roidb['gt_classes'][gt_inds].copy() gt_inds = np.where(roidb['gt_attributions'] > 0)[0] assert len(gt_inds) <= 2, 'Only one ground truth for image is allowed.' gt_attributions = roidb['gt_attributions'][gt_inds].copy() classes_or_attributions = roidb['classes_or_attributions'] for i in range(len(gt_classes)): if classes_or_attributions[i] == 0: img_labels[0] = gt_classes[i] - 1 weight[0] = 1.0 elif classes_or_attributions[i] == 1: attr_img_labels[0] = gt_attributions[i] - 1 attr_weight[0] = cfg.REID.PSE_WEIGHT else: img_labels[0] = gt_classes[i] - 1 weight[0] = 1.0 attr_img_labels[0] = gt_attributions[i] - 1 attr_weight[0] = cfg.REID.PSE_WEIGHT blob_dict = dict( labels_int32=img_labels.astype(np.int32, copy=False), attr_labels_int32=attr_img_labels.astype(np.int32, copy=False), weight=weight.astype(np.float32, copy=False), attr_weight=attr_weight.astype(np.float32, copy=False), ) return blob_dict # Get image label img_labels_oh = np.zeros((1, cfg.MODEL.NUM_CLASSES - 1), dtype=np.float32) img_labels = np.zeros((1), dtype=np.float32) gt_inds = np.where(roidb['gt_classes'] > 0)[0] assert len(gt_inds) == 1, 'Only one ground truth for image is allowed.' gt_classes = roidb['gt_classes'][gt_inds].copy() img_labels_oh[0][gt_classes[0] - 1] = 1 img_labels[0] = gt_classes[0] - 1 blob_dict = dict( labels_int32=img_labels.astype(np.int32, copy=False), labels_oh=img_labels_oh.astype(np.float32, copy=False), ) return blob_dict rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array( bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs( blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] gt_inds = np.where(roidb['gt_classes'] > 0)[0] gt_boxes = roidb['boxes'][gt_inds, :] gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]] # [mapped_gt_boxes, max_overlaps] mapped_gt_boxes = blob_utils.zeros((keep_inds.size, 5)) mapped_gt_boxes[:, :4] = gt_boxes[gt_assignments, :] * im_scale mapped_gt_boxes[:, 4] = max_overlaps[keep_inds] mapped_gt_boxes[fg_rois_per_this_image:, :] = 0 bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights, mapped_gt_boxes=mapped_gt_boxes) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON and cfg.MRCNN.AT_STAGE == 1: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON and cfg.KRCNN.AT_STAGE == 1: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx, cfg.TRAIN.FG_THRESH) return blob_dict
def _sample_rois(roidb, im_scale, batch_idx, stage): # 随机产生包含前景和背景的正负样本,即rois集合 """Generate a random sample of RoIs comprising foreground and background examples. """ fg_thresh = cfg.CASCADE_RCNN.FG_THRESHS[stage - 1] bg_thresh_hi = cfg.CASCADE_RCNN.BG_THRESHS_HI[stage - 1] bg_thresh_lo = cfg.CASCADE_RCNN.BG_THRESHS_LO[stage - 1] #### 这个变量很关键:定义了每一个roi与所有gt-bboxes所对应的最大的iou ??? max_overlaps = roidb["max_overlaps"] # Select foreground RoIs as those with >= FG_THRESH overlap #### 选择最大iou高于fg_thresh的那些rois,作为前景的indexes-->fg_inds fg_inds = np.where(max_overlaps >= fg_thresh)[0] #### 一张img所包含的所有fg_rois数量 fg_rois_per_this_image = fg_inds.size # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) #### 选择那些对应的最大iou值介于(bg_thesh_lo, bg_thresh_hi)的那些rois,采样为背景 bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo))[0] # The indices that we're selecting (both fg and bg) #### keep_inds为我们将要采样的样本的索引列表(包括正,负样本) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with #### sampled_labels即为roidb中采样的rois所对应的标签值labels sampled_labels = roidb["max_classes"][keep_inds] #### 将采样的负样本所对应的标签值都置为0 sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 #### sampled_boxes即为roidb中采样的所有正负样本,shape为(, 4) sampled_boxes = roidb["boxes"][keep_inds] #### 得到roidb中所有的gt_bbox的index gt_inds = np.where(roidb["gt_classes"] > 0)[0] #### 得到gt_bbox构成的集合,shape为(, 4) gt_boxes = roidb["boxes"][gt_inds, :] #### 这个有点不太理解了,gt_assignments为所有采样的作为正样本的rois所对应的gt_bboxes集合 gt_assignments = gt_inds[roidb["box_to_gt_ind_map"][keep_inds]] # [mapped_gt_boxes, max_overlaps] #### 初始化mapped_gt_boxes变量,保存所有采样的rois所对应的gt_bboxes的坐标( # 疑问:对于负样本也有gt_bbox??? ) mapped_gt_boxes = blob_utils.zeros((keep_inds.size, 5)) #### mapped_gt_boxes #### 将原图对应的 mapped_gt_boxes[:, :4] = gt_boxes[gt_assignments, :] * im_scale mapped_gt_boxes[:, 4] = max_overlaps[keep_inds] ####对于负样本所对应的行(fg_rois_per_this_image之后的)的数据,全部置为0 mapped_gt_boxes[fg_rois_per_this_image:, :] = 0 if "bbox_targets" not in roidb: bbox_targets = _compute_targets(sampled_boxes, gt_boxes[gt_assignments, :], sampled_labels, stage) else: bbox_targets = roidb['bbox_targets'][keep_inds, :] bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_targets) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Cascade R-CNN blobs blob_dict = dict( labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights, mapped_gt_boxes=mapped_gt_boxes, ) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON and cfg.MRCNN.AT_STAGE == stage: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON and cfg.KRCNN.AT_STAGE == stage: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_this_image, fg_inds, im_scale, batch_idx, fg_thresh) return blob_dict
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION # gao 6,29 gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] boxes_from_polys = roidb['boxes'][gt_inds, :] gt_classes = roidb['gt_classes'][gt_inds] im_label = cv2.imread(roidb['ins_seg'], 0) if roidb['flipped'] == 1: # convert flipped label to original im_label = im_label[:, ::-1] dataset_name = cfg.TRAIN.DATASETS[0] if 'LIP' in dataset_name: flipped_2_orig_class = { 14: 15, 15: 14, 16: 17, 17: 16, 18: 19, 19: 18 } if 'ATR' in dataset_name: flipped_2_orig_class = { 9: 10, 10: 9, 12: 13, 13: 12, 14: 15, 15: 14 } gt_classes_ = copy.deepcopy(gt_classes) for i in flipped_2_orig_class.keys(): index_i = np.where(gt_classes_ == i)[0] if len(index_i) == 0: continue gt_classes[index_i] = flipped_2_orig_class[i] # gt_inds_flip = np.where(gt_classes>13)[0] # for i in gt_inds_flip: # gt_classes[i] = flipped_2_orig_class[gt_classes[i]] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] # poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image #logger.info('roi_fg, label shape: {},{}'.format(roi_fg,im_label.shape)) x0, y0, x1, y1 = roi_fg x0 = min(int(x0), im_label.shape[1]) x1 = min(int(x1 + 1), im_label.shape[1]) y0 = min(int(y0), im_label.shape[0]) y1 = min(int(y1 + 1), im_label.shape[0]) #logger.info('x0,y0,x1,y1: {}'.format(x0, y0, x1, y1)) mask_ = im_label[y0:y1, x0:x1] #logger.info('mask_ shape: {}, gt_classes[fg_polys_ind]:{}'.format(mask_.shape, boxes_from_polys[fg_polys_ind])) # mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask_ == gt_classes[fg_polys_ind], dtype=np.int32) # Ensure it's binary mask = cv2.resize(mask, (M, M), interpolation=cv2.INTER_NEAREST) masks[i, :] = np.reshape(mask, M**2) im_label = None else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i,:] for i in polys_gt_inds] if not(boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros( blobs['labels_int32'].shape ) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value>0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[ fg_polys_inds[i] ] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][ fg_polys_ind ] ) # GT_I = np.array(roidb['dp_I'][ fg_polys_ind ]) GT_U = np.array(roidb['dp_U'][ fg_polys_ind ]) GT_V = np.array(roidb['dp_V'][ fg_polys_ind ]) GT_x = np.array(roidb['dp_x'][ fg_polys_ind ]) GT_y = np.array(roidb['dp_y'][ fg_polys_ind ]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if(IsFlipped): GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel = DP.get_symmetric_densepose(GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i],:] # x1 = roi_fg[0] ; x2 = roi_fg[2] y1 = roi_fg[1] ; y2 = roi_fg[3] # x1_source = roi_gt[0]; x2_source = roi_gt[2] y1_source = roi_gt[1]; y2_source = roi_gt[3] # x_targets = ( np.arange(x1,x2, (x2 - x1)/M ) - x1_source ) * ( 256. / (x2_source-x1_source) ) y_targets = ( np.arange(y1,y2, (y2 - y1)/M ) - y1_source ) * ( 256. / (y2_source-y1_source) ) # x_targets = x_targets[0:M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets,Y_targets] = np.meshgrid( x_targets, y_targets ) New_Index = cv2.remap(Ilabel,X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode= cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = (( GT_y / 256. * gt_length_y ) + y1_source - y1 ) * ( M / ( y2 - y1 ) ) GT_x = (( GT_x / 256. * gt_length_x ) + x1_source - x1 ) * ( M / ( x2 - x1 ) ) # GT_I[GT_y<0] = 0 GT_I[GT_y>(M-1)] = 0 GT_I[GT_x<0] = 0 GT_I[GT_x>(M-1)] = 0 # points_inside = GT_I>0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M ** 2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M ** 2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if(len(bg_inds)==0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1,196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # U_points = np.tile( U_points , [1,K+1] ) V_points = np.tile( V_points , [1,K+1] ) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1,K+1): Uv_Weight_Points[ : , jjj * I_points.shape[1] : (jjj+1) * I_points.shape[1] ] = ( I_points == jjj ).astype(np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype(np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def _sample_rois(roidb, im_scale, batch_idx): """Generate a random sample of RoIs comprising foreground and background examples. """ rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM) fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)) max_overlaps = roidb['max_overlaps'] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Label is the class each RoI has max overlap with sampled_labels = roidb['max_classes'][keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 # Label bg RoIs with class 0 sampled_boxes = roidb['boxes'][keep_inds] bbox_targets, bbox_inside_weights = _expand_bbox_targets( roidb['bbox_targets'][keep_inds, :]) bbox_outside_weights = np.array(bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) # Scale rois and format as (batch_idx, x1, y1, x2, y2) sampled_rois = sampled_boxes * im_scale repeated_batch_idx = batch_idx * blob_utils.ones( (sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) # Base Fast R-CNN blobs blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False), rois=sampled_rois, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, bbox_outside_weights=bbox_outside_weights) # Optionally add Mask R-CNN blobs if cfg.MODEL.MASK_ON: mask_rcnn_roi_data.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb, im_scale, batch_idx) # Optionally add Keypoint R-CNN blobs if cfg.MODEL.KEYPOINTS_ON: keypoint_rcnn_roi_data.add_keypoint_rcnn_blobs(blob_dict, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx) # optionally add Domain Adaptive R-CNN blobs if cfg.TRAIN.DOMAIN_ADAPTATION: if roidb['is_source']: blob_dict['dc_label'] = np.expand_dims(np.ones( blob_dict['labels_int32'].shape, dtype=blob_dict['labels_int32'].dtype), axis=1) blob_dict['label_mask'] = np.full(blob_dict['labels_int32'].shape, True) blob_dict['source_labels_int32'] = blob_dict['labels_int32'] blob_dict['source_bbox_targets'] = blob_dict['bbox_targets'] blob_dict['source_bbox_inside_weights'] = blob_dict[ 'bbox_inside_weights'] blob_dict['source_bbox_outside_weights'] = blob_dict[ 'bbox_outside_weights'] blob_dict['da_label_wide'] = np.ones((1, 1, 200, 400), dtype=np.int32) else: blob_dict['dc_label'] = np.expand_dims(np.zeros( blob_dict['labels_int32'].shape, dtype=blob_dict['labels_int32'].dtype), axis=1) blob_dict['label_mask'] = np.full(blob_dict['labels_int32'].shape, False) blob_dict['da_label_wide'] = np.zeros((1, 1, 200, 400), dtype=np.int32) return blob_dict
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add DensePose specific blobs to the given inputs blobs dictionary.""" M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # Prepare the body UV targets by associating one gt box which contains # body UV annotations to each training roi that has a fg class label. polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = roidb['boxes'][polys_gt_inds] # Select foreground RoIs fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_body_uv = np.zeros_like(blobs['labels_int32'], dtype=np.int32) if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)): # Find overlap between all foreground RoIs and the gt bounding boxes # containing each body UV annotaion. rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Select foreground RoIs as those with > 0.7 overlap fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)): roi_has_body_uv[fg_inds] = 1 # Create body UV blobs # Dense masks, each mask for a given fg roi is of size M x M. part_inds = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) # Weights assigned to each target in `part_inds`. By default, all 1's. # part_inds_weights = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) part_inds_weights = blob_utils.ones((fg_inds.shape[0], M, M), int32=False) # 2D spatial coordinates (on the image). Shape is (#fg_rois, 2) in format # (x, y). coords_xy = blob_utils.zeros((fg_inds.shape[0], 196, 2), int32=False) # 24 patch indices plus a background class I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) # UV coordinates in each patch U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) # Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = overlaps_bbfg_bbpolys[fg_inds] # Map from each fg roi to the index of the gt box with highest overlap fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Add body UV targets for each fg roi for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] polys_gt_ind = polys_gt_inds[fg_polys_ind] # RLE encoded dense masks which are of size 256 x 256. # Map all part masks to 14 labels (i.e., indices of semantic body parts). dp_masks = dp_utils.GetDensePoseMask( roidb['dp_masks'][polys_gt_ind], cfg.BODY_UV_RCNN.NUM_SEMANTIC_PARTS) # Surface patch indices of collected points dp_I = np.array(roidb['dp_I'][polys_gt_ind], dtype=np.int32) # UV coordinates of collected points dp_U = np.array(roidb['dp_U'][polys_gt_ind], dtype=np.float32) dp_V = np.array(roidb['dp_V'][polys_gt_ind], dtype=np.float32) # dp_UV_weights = np.ones_like(dp_I).astype(np.float32) # Spatial coordinates on the image which are scaled such that the bbox # size is 256 x 256. dp_x = np.array(roidb['dp_x'][polys_gt_ind], dtype=np.float32) dp_y = np.array(roidb['dp_y'][polys_gt_ind], dtype=np.float32) # Do the flipping of the densepose annotation if roidb['flipped']: dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks = DP.get_symmetric_densepose( dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks) roi_fg = rois_fg[i] gt_box = boxes_from_polys[fg_polys_ind] fg_x1, fg_y1, fg_x2, fg_y2 = roi_fg[0:4] gt_x1, gt_y1, gt_x2, gt_y2 = gt_box[0:4] fg_width = fg_x2 - fg_x1 fg_height = fg_y2 - fg_y1 gt_width = gt_x2 - gt_x1 gt_height = gt_y2 - gt_y1 fg_scale_w = float(M) / fg_width fg_scale_h = float(M) / fg_height gt_scale_w = 256. / gt_width gt_scale_h = 256. / gt_height # Sample M points evenly within the fg roi and scale the relative coordinates # (to associated gt box) such that the bounding box size is 256 x 256. x_targets = (np.arange(fg_x1, fg_x2, fg_width / M) - gt_x1) * gt_scale_w y_targets = (np.arange(fg_y1, fg_y2, fg_height / M) - gt_y1) * gt_scale_h # Construct 2D coordiante matrices x_targets, y_targets = np.meshgrid(x_targets[:M], y_targets[:M]) ## Another implementation option (which results in similar performance) # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=True, dtype=np.float32) - gt_x1) * gt_scale_w # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=True, dtype=np.float32) - gt_y1) * gt_scale_h # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=False) - gt_x1) * gt_scale_w # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=False) - gt_y1) * gt_scale_h # x_targets, y_targets = np.meshgrid(x_targets, y_targets) # Map dense masks of size 256 x 256 to target heatmap of size M x M. part_inds[i] = cv2.remap(dp_masks, x_targets.astype(np.float32), y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # Scale annotated spatial coordinates from bbox of size 256 x 256 to target # heatmap of size M x M. dp_x = (dp_x / gt_scale_w + gt_x1 - fg_x1) * fg_scale_w dp_y = (dp_y / gt_scale_h + gt_y1 - fg_y1) * fg_scale_h # Set patch index of points outside the heatmap as 0 (background). dp_I[dp_x < 0] = 0 dp_I[dp_x > (M - 1)] = 0 dp_I[dp_y < 0] = 0 dp_I[dp_y > (M - 1)] = 0 # Get body UV annotations of points inside the heatmap. points_inside = dp_I > 0 dp_x = dp_x[points_inside] dp_y = dp_y[points_inside] dp_I = dp_I[points_inside] dp_U = dp_U[points_inside] dp_V = dp_V[points_inside] # dp_UV_weights = dp_UV_weights[points_inside] # Update body UV blobs num_dp_points = len(dp_I) # coords_xy[i, 0:num_dp_points, 0] = i # fg_roi index coords_xy[i, 0:num_dp_points, 0] = dp_x coords_xy[i, 0:num_dp_points, 1] = dp_y I_points[i, 0:num_dp_points] = dp_I.astype(np.int32) U_points[i, 0:num_dp_points] = dp_U V_points[i, 0:num_dp_points] = dp_V # Uv_point_weights[i, 0:len(dp_UV_weights)] = dp_UV_weights else: # If there are no fg rois # The network cannot handle empty blobs, so we must provide a blob. # We simply take the first bg roi, give it an all 0's body UV annotations # and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # `rois_fg` is actually one background roi, but that's ok because ... if len(bg_inds) == 0: rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # Mark that the first roi has body UV annotation roi_has_body_uv[0] = 1 # We give it all 0's blobs part_inds = blob_utils.zeros((1, M, M), int32=True) part_inds_weights = blob_utils.zeros((1, M, M), int32=False) coords_xy = blob_utils.zeros((1, 196, 2), int32=False) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) # Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Create body UV blobs for all patches (including background) K = cfg.BODY_UV_RCNN.NUM_PATCHES + 1 # Construct U/V_points blobs for all patches by repeating it #num_patches times. # Shape: (#rois, 196, K) U_points = np.repeat(U_points[:, :, np.newaxis], K, axis=-1) V_points = np.repeat(V_points[:, :, np.newaxis], K, axis=-1) uv_point_weights = np.zeros_like(U_points) # Set binary weights for UV targets in each patch for i in np.arange(1, K): uv_point_weights[:, :, i] = (I_points == i).astype(np.float32) # Update blobs dict with body UV blobs blobs['body_uv_rois'] = rois_fg blobs['roi_has_body_uv_int32'] = roi_has_body_uv # shape: (#rois,) blobs['body_uv_parts'] = part_inds # shape: (#rois, M, M) blobs['body_uv_parts_weights'] = part_inds_weights blobs['body_uv_coords_xy'] = coords_xy.reshape( -1, 2) # shape: (#rois * 196, 2) blobs['body_uv_I_points'] = I_points.reshape(-1, 1) # shape: (#rois * 196, 1) blobs['body_uv_U_points'] = U_points # shape: (#rois, 196, K) blobs['body_uv_V_points'] = V_points blobs['body_uv_point_weights'] = uv_point_weights