def _build_graph(boxes, iou_threshold): """Build graph based on box IoU""" overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), boxes.astype(dtype=np.float32, copy=False)) return (overlaps > iou_threshold).astype(np.float32)
def get_labels(model, i): workspace.ResetWorkspace() workspace.RunNetOnce(model.param_init_net) #print(str(model.param_init_net.Proto())) #with open(os.path.join(os.getcwd(), "train_net.pbtxt"), 'w') as fid: # fid.write(str(model.net.Proto())) #with open(os.path.join(os.getcwd(), "train_init_net.pbtxt"), 'w') as fid: # fid.write(str(model.param_init_net.Proto())) roidb = workspace.FetchBlob(core.ScopedName("roidb")) for entry in roidb: print("roidb: ", entry.keys()) return #label_boxes = workspace.FetchBlob(core.ScopedName("labels_int32")) #gt_boxes = workspace.FetchBlob(core.ScopedName("bbox_targets")) pred_boxes = workspace.FetchBlob( core.ScopedName('bbox_pred_stage_' + str(i + 1))) num_inside = pred_boxes.shape[0] labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(0) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(pred_boxes, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Fg label: above threshold IOU labels = np.array([label_boxes[i] for i in anchor_to_gt_argmax], dtype=np.int32) workspace.FeedBlob(core.ScopedName("labels_stage_" + str(i + 1)), labels)
def _compute_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def compute_depth_targets(entry): """Compute centroid depth regression targets for an image.""" # Indices of ground-truth distances from bbox centroid to camera rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, depth) targets = np.zeros((labels.shape[0], 2), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_depths = entry['depths'][gt_inds, :] targets[ex_inds, 0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = gt_depths[gt_assignment, :] return targets
def compute_bbox_regression_targets(entry): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs rois = entry['boxes'] overlaps = entry['max_overlaps'] labels = entry['max_classes'] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Targets has format (class, tx, ty, tw, th) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return targets # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = box_utils.bbox_overlaps( rois[ex_inds, :].astype(dtype=np.float32, copy=False), rois[gt_inds, :].astype(dtype=np.float32, copy=False)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # Use class "1" for all boxes if using class_agnostic_bbox_reg targets[ex_inds, 0] = ( 1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds]) targets[ex_inds, 1:] = box_utils.bbox_transform_inv( ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS) return targets
def _merge_proposal_boxes_into_roidb(roidb, box_list, score_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] scores = score_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append(entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0) entry['obn_scores'] = np.append(entry['obn_scores'], scores.astype( entry['obn_scores'].dtype, copy=False), axis=0) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype, copy=False))
def forward(self, inputs, outputs): """See modeling.detector.AddBBoxAccuracy for inputs/outputs documentation. """ # predicted bbox deltas bbox_deltas = inputs[0].data # proposals bbox_data = inputs[1].data assert bbox_data.shape[1] == 5 bbox_prior = bbox_data[:, 1:] # labels labels = inputs[2].data # mapped gt boxes mapped_gt_boxes = inputs[3].data gt_boxes = mapped_gt_boxes[:, :4] max_overlap = mapped_gt_boxes[:, 4] # bbox iou only for fg and non-gt boxes keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0] num_boxes = keep_inds.size bbox_deltas = bbox_deltas[keep_inds, :] bbox_prior = bbox_prior[keep_inds, :] labels = labels[keep_inds] gt_boxes = gt_boxes[keep_inds, :] max_overlap = max_overlap[keep_inds] if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0: bbox_deltas = bbox_deltas[:, -4:] else: bbox_deltas = np.vstack([ bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4] for i in range(num_boxes) ]) pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) avg_iou = 0. pre_avg_iou = sum(max_overlap) for i in range(num_boxes): gt_box = gt_boxes[i, :] pred_box = pred_boxes[i, :] tmp_iou = box_utils.bbox_overlaps( gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False), pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False), ) avg_iou += tmp_iou[0] if num_boxes > 0: avg_iou /= num_boxes pre_avg_iou /= num_boxes outputs[0].reshape([1]) outputs[0].data[...] = avg_iou outputs[1].reshape([1]) outputs[1].data[...] = pre_avg_iou
def _do_test(b1, b2): # Compute IoU overlap with the cython implementation cython_iou = box_utils.bbox_overlaps(b1, b2) # Compute IoU overlap with the COCO API implementation # (requires converting boxes from xyxy to xywh format) xywh_b1 = box_utils.xyxy_to_xywh(b1) xywh_b2 = box_utils.xyxy_to_xywh(b2) not_crowd = [int(False)] * b2.shape[0] coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd) # IoUs should be similar np.testing.assert_array_almost_equal( cython_iou, coco_ious, decimal=5 )
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] gt_scores = proposals['gt_scores'] overlaps = box_utils.bbox_overlaps( all_rois.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg_TRAIN_FG_THRESH)[0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg_TRAIN_FG_THRESH)[0] ig_inds = np.where(max_overlaps < cfg_TRAIN_BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 labels[bg_inds] = 0 gt_assignment[bg_inds] = -1 img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32) pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32) for i in xrange(gt_boxes.shape[0]): po_index = np.where(gt_assignment == i)[0] img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index]) pc_labels[i] = gt_labels[i, 0] pc_count[i] = len(po_index) pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]]) return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i,:] for i in polys_gt_inds] if not(boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros( blobs['labels_int32'].shape ) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value>0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0) ): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M ** 2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[ fg_polys_inds[i] ] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][ fg_polys_ind ] ) # GT_I = np.array(roidb['dp_I'][ fg_polys_ind ]) GT_U = np.array(roidb['dp_U'][ fg_polys_ind ]) GT_V = np.array(roidb['dp_V'][ fg_polys_ind ]) GT_x = np.array(roidb['dp_x'][ fg_polys_ind ]) GT_y = np.array(roidb['dp_y'][ fg_polys_ind ]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if(IsFlipped): GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel = DP.get_symmetric_densepose(GT_I,GT_U,GT_V,GT_x,GT_y,Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i],:] # x1 = roi_fg[0] ; x2 = roi_fg[2] y1 = roi_fg[1] ; y2 = roi_fg[3] # x1_source = roi_gt[0]; x2_source = roi_gt[2] y1_source = roi_gt[1]; y2_source = roi_gt[3] # x_targets = ( np.arange(x1,x2, (x2 - x1)/M ) - x1_source ) * ( 256. / (x2_source-x1_source) ) y_targets = ( np.arange(y1,y2, (y2 - y1)/M ) - y1_source ) * ( 256. / (y2_source-y1_source) ) # x_targets = x_targets[0:M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets,Y_targets] = np.meshgrid( x_targets, y_targets ) New_Index = cv2.remap(Ilabel,X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode= cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = (( GT_y / 256. * gt_length_y ) + y1_source - y1 ) * ( M / ( y2 - y1 ) ) GT_x = (( GT_x / 256. * gt_length_x ) + x1_source - x1 ) * ( M / ( x2 - x1 ) ) # GT_I[GT_y<0] = 0 GT_I[GT_y>(M-1)] = 0 GT_I[GT_x<0] = 0 GT_I[GT_x>(M-1)] = 0 # points_inside = GT_I>0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M ** 2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M ** 2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if(len(bg_inds)==0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1,196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M ** 2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # U_points = np.tile( U_points , [1,K+1] ) V_points = np.tile( V_points , [1,K+1] ) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1,K+1): Uv_Weight_Points[ : , jjj * I_points.shape[1] : (jjj+1) * I_points.shape[1] ] = ( I_points == jjj ).astype(np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype(np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[gt_inds] fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): # rois boxes = box_list[i] num_boxes = boxes.shape[0] # (num boxes, num class + 1) gt_overlaps = np.zeros( (num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype ) # (num boxes,) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). # gt_classes标记每个gt属于哪个类别,可以获得所有gt的所在的位置 gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: # gt box gt_boxes = entry['boxes'][gt_inds, :] # gt class gt_classes = entry['gt_classes'][gt_inds] # 计算anchor生成的box和gt的iou proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) # 计算每个box与哪个gt box的iou最大 argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box # 对proposals box的gt_overlaps进行赋值 gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] # 每个box对应的gt的索引,默认值为-1 box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] # 添加新的box entry['boxes'] = np.append( entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) # 全部添加为0,则大于0的就是gt entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype) ) # 全部添加为0 entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) # 添加每个box和gt box的iou entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) # 全部添加为0 entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) # 添加每个box对应的gt box索引 entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) )
def _merge_proposal_boxes_into_roidb(roidb, box_list, model=None): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] if cfg.TRAIN.DOMAIN_ADAPTATION: rois_per_image = min(len(boxes), int(cfg.TRAIN.BATCH_SIZE_PER_IM)) entry['da_boxes'] = np.array(boxes[:rois_per_image], dtype=np.float32) if not entry['is_source']: weight = model.class_weight_db.get_avg_pada_weight() ims = cfg.TRAIN.IMS_PER_BATCH source_imgs = ims - ims // 2 target_imgs = ims // 2 weight *= source_imgs / target_imgs entry['pada_roi_weights'] = np.full(rois_per_image, weight, dtype=np.float32) # print('pada_dc_target_weights:',rois_per_image*weight) continue # we do not supervise on target set rois. num_boxes = boxes.shape[0] #the rpn_rois for this image=entry gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append(entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype, copy=False)) # for DA: if cfg.TRAIN.PADA and cfg.TRAIN.DOMAIN_ADAPTATION: # we pre-calcluate the weights here # keep_inds = np.arange(rois_per_image) # boxes = boxes[:rois_per_image] # already submitted as 'da_boxes' maxes = maxes[:rois_per_image] argmaxes = argmaxes[:rois_per_image] labels = gt_classes[argmaxes] assert (labels[maxes > 0] != 0).all() # model.class_weight_db.set_maxes(maxes) class_weights = model.class_weight_db.class_weights # Each roi has a fg and a bg part. The portion between these parts is determined by the IoU (scores). # The fg part is weighted by PADA with the corresponding class weights, and the bg part is set # to be on average 75% of the total weight: w_pada * fg + bg # The bg wieghts can also be less than 75% if there are not much bg rois, because w_bg may be at most 1.0 per roi. pada_weights = maxes * class_weights[labels] # pada_fg_weight = pada_weights.sum() # fg_weight = maxes.sum() # avg_pada_roi_weight = pada_fg_weight / (fg_weight + np.finfo(float).eps) # avg_pada_roi_weight = model.class_weight_db.update_get_avg_pada_weight(avg_pada_roi_weight,fg_weight) avg_pada_roi_weight = model.class_weight_db.get_avg_pada_weight() bg_weights = ( 1 - maxes ) * avg_pada_roi_weight # scale bg rois similar as average fg scale. box_weights = pada_weights + bg_weights # each roi is both partially fg and bg, weighted by IoU. entry['pada_roi_weights'] = np.array(box_weights, dtype=np.float32)
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh) )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1]) ] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max )[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False ) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :] ) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap( bbox_targets, total_anchors, inds_inside, fill=0 ) bbox_inside_weights = data_utils.unmap( bbox_inside_weights, total_anchors, inds_inside, fill=0 ) bbox_outside_weights = data_utils.unmap( bbox_outside_weights, total_anchors, inds_inside, fill=0 ) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict( rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights ) ) return blobs_out[0] if len(blobs_out) == 1 else blobs_out
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): IsFlipped = roidb['flipped'] M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = [roidb['boxes'][i, :] for i in polys_gt_inds] input_w = roidb['input_width'] input_h = roidb['input_height'] if not (boxes_from_polys): pass else: boxes_from_polys = np.vstack(boxes_from_polys) boxes_from_polys = np.array(boxes_from_polys) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = np.zeros(blobs['labels_int32'].shape) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): rois_fg = sampled_boxes[fg_inds] # rois_fg.astype(np.float32, copy=False) boxes_from_polys.astype(np.float32, copy=False) # overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] all_person_masks = np.zeros((int(input_h), int(input_w)), dtype=np.float32) if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): # controle the number of roi if fg_inds.shape[0] > 6: fg_inds = fg_inds[:6] for jj in fg_inds: roi_has_mask[jj] = 1 # Create blobs for densepose supervision. ################################################## The mask All_labels = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) All_Weights = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) ################################################# The points X_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Y_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Ind_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) ################################################# rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) rois = np.copy(rois_fg) for i in range(rois_fg.shape[0]): # fg_polys_ind = polys_gt_inds[fg_polys_inds[i]] # Ilabel = segm_utils.GetDensePoseMask( roidb['dp_masks'][fg_polys_ind]) # GT_I = np.array(roidb['dp_I'][fg_polys_ind]) GT_U = np.array(roidb['dp_U'][fg_polys_ind]) GT_V = np.array(roidb['dp_V'][fg_polys_ind]) GT_x = np.array(roidb['dp_x'][fg_polys_ind]) GT_y = np.array(roidb['dp_y'][fg_polys_ind]) GT_weights = np.ones(GT_I.shape).astype(np.float32) # ## Do the flipping of the densepose annotation ! if (IsFlipped): GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose( GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel) # roi_fg = rois_fg[i] roi_gt = boxes_from_polys[fg_polys_inds[i], :] # x1 = roi_fg[0] x2 = roi_fg[2] y1 = roi_fg[1] y2 = roi_fg[3] # x1_source = roi_gt[0] x2_source = roi_gt[2] y1_source = roi_gt[1] y2_source = roi_gt[3] # x_targets = (np.arange(x1, x2, (x2 - x1) / M) - x1_source) * (256. / (x2_source - x1_source)) y_targets = (np.arange(y1, y2, (y2 - y1) / M) - y1_source) * (256. / (y2_source - y1_source)) # x_targets = x_targets[ 0: M] ## Strangely sometimes it can be M+1, so make sure size is OK! y_targets = y_targets[0:M] # [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets) New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # All_L = np.zeros(New_Index.shape) All_W = np.ones(New_Index.shape) # All_L = New_Index # gt_length_x = x2_source - x1_source gt_length_y = y2_source - y1_source # GT_y = ((GT_y / 256. * gt_length_y) + y1_source - y1) * (M / (y2 - y1)) GT_x = ((GT_x / 256. * gt_length_x) + x1_source - x1) * (M / (x2 - x1)) # GT_I[GT_y < 0] = 0 GT_I[GT_y > (M - 1)] = 0 GT_I[GT_x < 0] = 0 GT_I[GT_x > (M - 1)] = 0 # points_inside = GT_I > 0 GT_U = GT_U[points_inside] GT_V = GT_V[points_inside] GT_x = GT_x[points_inside] GT_y = GT_y[points_inside] GT_weights = GT_weights[points_inside] GT_I = GT_I[points_inside] # X_points[i, 0:len(GT_x)] = GT_x Y_points[i, 0:len(GT_y)] = GT_y Ind_points[i, 0:len(GT_I)] = i I_points[i, 0:len(GT_I)] = GT_I U_points[i, 0:len(GT_U)] = GT_U V_points[i, 0:len(GT_V)] = GT_V Uv_point_weights[i, 0:len(GT_weights)] = GT_weights # All_labels[i, :] = np.reshape(All_L.astype(np.int32), M**2) All_Weights[i, :] = np.reshape(All_W.astype(np.int32), M**2) ## # proposal based segmentation p_mask = (Ilabel > 0).astype(np.float32) target_roi = roi_gt * im_scale p_mask = cv2.resize(p_mask, (int(target_roi[2] - target_roi[0]), int(target_roi[3] - target_roi[1]))) p_mask = (p_mask > 0.5).astype(np.float32) start_y, start_x = int(target_roi[1]), int(target_roi[0]) end_y, end_x = start_y + p_mask.shape[0], start_x + p_mask.shape[1] # if all_person_masks[start_y:end_y, start_x:end_x].shape[0]!=p_mask.shape[0] or all_person_masks[start_y:end_y, start_x:end_x].shape[1]!=p_mask.shape[1]: # print('shape exception:',all_person_masks[start_y:end_y, start_x:end_x].shape,p_mask.shape) # print('roi:',target_roi) # print(start_y,end_y, start_x,end_x) # print('input image:',all_person_masks.shape) # assert False all_person_masks[start_y:end_y, start_x:end_x] = p_mask else: bg_inds = np.where(blobs['labels_int32'] == 0)[0] # if (len(bg_inds) == 0): rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) roi_has_mask[0] = 1 # X_points = blob_utils.zeros((1, 196), int32=False) Y_points = blob_utils.zeros((1, 196), int32=False) Ind_points = blob_utils.zeros((1, 196), int32=True) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # All_labels = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros All_Weights = -blob_utils.ones((1, M**2), int32=True) * 0 ## zeros # rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # K = cfg.BODY_UV_RCNN.NUM_PATCHES # u_points = np.copy(U_points) v_points = np.copy(V_points) U_points = np.tile(U_points, [1, K + 1]) V_points = np.tile(V_points, [1, K + 1]) Uv_Weight_Points = np.zeros(U_points.shape) # for jjj in xrange(1, K + 1): Uv_Weight_Points[:, jjj * I_points.shape[1]:(jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype( np.float32) # # person masks here person_mask = (All_labels > 0).astype(np.int32) # extra # index_targets = np.zeros_like(person_mask).reshape((-1,M,M)).astype(np.int32) # index_targets_weights = np.zeros_like(index_targets) # u_targets = np.zeros((index_targets.shape[0],25,M,M),dtype=np.float32) # v_targets = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32) # uv_weights = np.zeros((index_targets.shape[0], 25, M, M),dtype=np.float32) # for ibatch in range(index_targets.shape[0]): # for i_surface in range(1,K+1): # points_i = I_points[ibatch] == i_surface # if len(points_i)>0: # points_x = np.asarray(X_points[ibatch][points_i], dtype=np.int32).reshape((-1,1)) # points_y = np.asarray(Y_points[ibatch][points_i], dtype=np.int32).reshape((-1,1)) # points_u = u_points[ibatch][points_i].reshape((1, -1)) # points_v = v_points[ibatch][points_i].reshape((1, -1)) # locs = np.hstack([points_x, points_y]) # # for step in [1]: # x_plus_locs = np.copy(points_x) + step # y_plus_locs = np.copy(points_y) + step # x_minus_locs = np.copy(points_x) - step # y_minus_locs = np.copy(points_y) - step # # locs = np.vstack([locs, np.hstack([x_plus_locs, y_plus_locs])]) # locs = np.vstack([locs, np.hstack([x_plus_locs, y_minus_locs])]) # locs = np.vstack([locs, np.hstack([x_minus_locs, y_plus_locs])]) # locs = np.vstack([locs, np.hstack([x_minus_locs, y_minus_locs])]) # # locs[locs < 0] = 0. # locs[locs >= M] = M - 1 # # points_u = np.repeat(points_u, 5, axis=0).reshape((-1)) # points_v = np.repeat(points_v, 5, axis=0).reshape((-1)) # # # index_targets[ibatch][locs[:,1], locs[:, 0]] = i_surface # index_targets_weights[ibatch][locs[:, 1], locs[:, 0]] = 1 # u_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_u # v_targets[ibatch, i_surface][locs[:, 1], locs[:, 0]] = points_v # uv_weights[ibatch, i_surface][locs[:, 1], locs[:, 0]] = 1. # if random.random() <= 0.5: # _,index_targets[ibatch], v_targets[ibatch], v_targets[ibatch], index_targets_weights[ibatch], uv_weights[ibatch] = expand_dp_targets(All_labels[ibatch].reshape((M,M)), # index_targets[ibatch], v_targets[ibatch], # v_targets[ibatch], # index_targets_weights[ibatch], # uv_weights[ibatch]) # proposal all masks here if (bool(boxes_from_polys.any()) & (fg_inds.shape[0] > 0)): proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) for i in range(rois_fg.shape[0]): roi_fg = rois_fg[i][1:] proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[3]), int(roi_fg[0]):int(roi_fg[2])] proposal_mask = cv2.resize(proposal_mask, (M, M)) proposal_mask = (proposal_mask > 0.5).astype(np.int32) proposal_all_mask[i] = proposal_mask else: proposal_all_mask = -blob_utils.ones( (1, M, M), int32=True) * 0 ## zeros ################ # Update blobs dict with Mask R-CNN blobs ############### # blobs['body_mask_labels'] = person_mask.reshape((-1, M, M)) blobs['body_uv_rois'] = np.array(rois_fg) blobs['roi_has_body_uv_int32'] = np.array(roi_has_mask).astype(np.int32) ## blobs['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32) blobs['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32) # ########################## blobs['body_uv_X_points'] = X_points.astype(np.float32) blobs['body_uv_Y_points'] = Y_points.astype(np.float32) blobs['body_uv_Ind_points'] = Ind_points.astype(np.float32) blobs['body_uv_I_points'] = I_points.astype(np.float32) blobs['body_uv_U_points'] = U_points.astype( np.float32) #### VERY IMPORTANT : These are switched here : blobs['body_uv_V_points'] = V_points.astype(np.float32) blobs['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32) ################### # extra # blobs['body_uv_Index_targets'] = index_targets # blobs['body_uv_Index_targets_weights'] = index_targets_weights.astype(np.float32) # blobs['body_uv_U_targets'] = u_targets # blobs['body_uv_V_targets'] = v_targets # blobs['body_uv_weights'] = uv_weights ################ # add by wxh if cfg.BODY_UV_RCNN.USE_CLS_EMBS: fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs( All_labels.reshape((-1, M, M))) # print('fg',fg_embs.max(), fg_embs.min()) # print('bg',bg_embs.max(), bg_embs.min()) fg_norms = np.sum(fg_embs, axis=(1, 2)) fg_norms[fg_norms != 0] = 56. * 56. / fg_norms[fg_norms != 0] bg_norms = np.sum(bg_embs, axis=(1, 2)) bg_norms[bg_norms != 0] = 56. * 56. / bg_norms[bg_norms != 0] blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)), 2, axis=1) blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)), 2, axis=1) blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1) blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1) blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_weights'] = np.vstack([fg_weights, bg_weights]).reshape( (-1, 1)).astype(np.float32) if cfg.BODY_UV_RCNN.USE_BOX_ALL_MASKS: blobs['body_masks_wrt_box'] = proposal_all_mask
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION input_w = roidb['input_width'] input_h = roidb['input_height'] polys_gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 mask_fg_rois_per_this_image = cfg.MRCNN.MAX_ROIS_PER_IM if fg_inds.shape[0] > 0: if fg_inds.size > mask_fg_rois_per_this_image: fg_inds = np.random.choice(fg_inds, size=mask_fg_rois_per_this_image, replace=False) # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) all_person_masks = np.zeros( (int(input_h / im_scale), int(input_w / im_scale)), dtype=np.float32) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) # to an box_h x box_w binary image mask_wrt_bbox = segm_utils.convert_polys_to_mask_wrt_box( poly_gt, roi_fg) start_y, start_x = int(roi_fg[1]), int(roi_fg[0]) end_y, end_x = start_y + mask_wrt_bbox.shape[ 0], start_x + mask_wrt_bbox.shape[1] all_person_masks[start_y:end_y, start_x:end_x] = mask_wrt_bbox proposal_all_mask = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) for i in range(rois_fg.shape[0]): roi_fg = rois_fg[i] w = roi_fg[2] - roi_fg[0] h = roi_fg[3] - roi_fg[1] w = int(np.maximum(w, 1)) h = int(np.maximum(h, 1)) proposal_mask = all_person_masks[int(roi_fg[1]):int(roi_fg[1]) + h, int(roi_fg[0]):int(roi_fg[0]) + w] # proposal_mask = proposal_mask.astype(np.float32) proposal_mask = cv2.resize(proposal_mask, (M, M)) proposal_mask = (proposal_mask > 0.5).astype(np.int32) proposal_all_mask[i] = proposal_mask else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 proposal_all_mask = -blob_utils.ones((1, M, M), int32=True) if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks # blobs['mask_labels'] = np.argmax(masks.reshape((-1,cfg.MODEL.NUM_CLASSES,M,M)),axis=1).reshape((-1,M,M)).astype(np.int32) # blobs['mask_weights'] = np.ones(blobs['mask_labels'].shape, dtype=np.float32) # add by wxh if cfg.MRCNN.USE_CLS_EMBS: fg_embs, bg_embs, fg_weights, bg_weights = masks_to_embs( masks.reshape((-1, cfg.MODEL.NUM_CLASSES, M, M))) # print('fg',fg_embs.max(), fg_embs.min()) # print('bg',bg_embs.max(), bg_embs.min()) fg_norms = np.sum(fg_embs, axis=(1, 2)) fg_norms[fg_norms != 0] = 28. * 28. / (fg_norms[fg_norms != 0] + 1e-6) bg_norms = np.sum(bg_embs, axis=(1, 2)) bg_norms[bg_norms != 0] = 28. * 28. / (bg_norms[bg_norms != 0] + 1e-6) blobs['fg_mask'] = np.repeat(np.reshape(fg_embs, (-1, 1, M, M)), 2, axis=1) blobs['bg_mask'] = np.repeat(np.reshape(bg_embs, (-1, 1, M, M)), 2, axis=1) blobs['fg_norm'] = np.repeat(np.reshape(fg_norms, (-1, 1)), 2, axis=1) blobs['bg_norm'] = np.repeat(np.reshape(bg_norms, (-1, 1)), 2, axis=1) blobs['mask_emb_fg_labels'] = np.ones((fg_embs.shape[0], 1), dtype=np.int32) blobs['mask_emb_bg_labels'] = np.zeros((bg_embs.shape[0], 1), dtype=np.int32) # blobs['mask_emb_weights'] = np.vstack([fg_weights, bg_weights]).reshape((-1,1)).astype(np.float32) if cfg.MRCNN.BBOX_CASCADE_MASK_ON: blobs['inter_masks_int32'] = proposal_all_mask
def forward(self, inputs, outputs): """See modeling.detector.AddBBoxAccuracy for inputs/outputs documentation. """ # predicted bbox deltas, shape为(R, C*4) bbox_deltas = inputs[0].data # proposals的坐标集合, shape为(R, 5) bbox_data = inputs[1].data assert bbox_data.shape[1] == 5 ### bbox_prior为所有的proposals坐标, shape为(R, 4) bbox_prior = bbox_data[:, 1:] # labels labels = inputs[2].data # mapped gt boxes mapped_gt_boxes = inputs[3].data gt_boxes = mapped_gt_boxes[:, :4] max_overlap = mapped_gt_boxes[:, 4] # bbox iou only for fg and non-gt boxes ###这里的labels指的是mapped_gt_bbox对应的labels吧??? ###同时一移除所有的gt boxes ###相当于对这些gt bbox或proposals进行筛选 keep_inds = np.where((labels > 0) & (max_overlap < 1.0))[0] ###所有符合要求的proposals个数 num_boxes = keep_inds.size bbox_deltas = bbox_deltas[keep_inds, :] bbox_prior = bbox_prior[keep_inds, :] labels = labels[keep_inds] gt_boxes = gt_boxes[keep_inds, :] max_overlap = max_overlap[keep_inds] ### 关于AGNOSTIC_BBOX_REG 这个什么意思我始终云里雾里 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG or num_boxes == 0: bbox_deltas = bbox_deltas[:, -4:] else: ### 将bbox_deltas的数据结构重新组织,即只保留bbox_deltas中 ### 每一组回归参数对应的类别和labels(对应的gt真值)类别相同的回归参数 ### 处理后的bbox_deltas的shape为(num_boxes, 4) bbox_deltas = np.vstack([ bbox_deltas[i, labels[i] * 4:labels[i] * 4 + 4] for i in range(num_boxes) ]) ### 通过bbox_transform函数将得到的proposals经过回归参数回归后 ### 得到预测框predicted_bboxes,注意_bbox_reg_weights pred_boxes = box_utils.bbox_transform(bbox_prior, bbox_deltas, self._bbox_reg_weights) #####平均iou初值为0 avg_iou = 0. pre_avg_iou = sum(max_overlap) for i in range(num_boxes): ###第i个gt_box(对应于第i个pred_bbox)的坐标值 gt_box = gt_boxes[i, :] ###第i个pred_box的坐标值 pred_box = pred_boxes[i, :] ###计算gt_box与pred_box之间的IOU tmp_iou = box_utils.bbox_overlaps( gt_box[np.newaxis, :].astype(dtype=np.float32, copy=False), pred_box[np.newaxis, :].astype(dtype=np.float32, copy=False), ) avg_iou += tmp_iou[0] if num_boxes > 0: avg_iou /= num_boxes pre_avg_iou /= num_boxes ### 即outputs【0】--->本stage的avg_iou ### outputs[1]----->上一个stage的avg_iou outputs[0].reshape([1]) outputs[0].data[...] = avg_iou outputs[1].reshape([1]) outputs[1].data[...] = pre_avg_iou
def _get_retinanet_blobs( foas, all_anchors, gt_boxes, gt_classes, im_width, im_height): total_anchors = all_anchors.shape[0] logger.debug('Getting mad blobs: im_height {} im_width: {}'.format( im_height, im_width)) inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.float32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[ np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap] labels[anchors_with_max_overlap] = gt_classes[gt_inds] # Fg label: above threshold IOU inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP gt_inds = anchor_to_gt_argmax[inds] labels[inds] = gt_classes[gt_inds] fg_inds = np.where(labels >= 1)[0] bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0] labels[bg_inds] = 0 num_fg, num_bg = len(fg_inds), len(bg_inds) bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size end_idx = start_idx + H * W _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, height, width) _labels = _labels.reshape((1, 1, H, W)) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2) stride = foa.stride w = int(im_width / stride) h = int(im_height / stride) # data for select_smooth_l1 loss num_classes = cfg.MODEL.NUM_CLASSES - 1 inds_4d = np.where(_labels > 0) M = len(inds_4d) _roi_bbox_targets = np.zeros((0, 4)) _roi_fg_bbox_locs = np.zeros((0, 4)) if M > 0: im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3] _roi_bbox_targets = np.zeros((len(im_inds), 4)) _roi_fg_bbox_locs = np.zeros((len(im_inds), 4)) lbls = _labels[im_inds, :, y, x] for i, lbl in enumerate(lbls): l = lbl[0] - 1 if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: l = 0 assert l >= 0 and l < num_classes, 'label out of the range' _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]] _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]]) blobs_out.append( dict( retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32), retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32), retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32), )) out_num_fg = np.array([num_fg + 1.0], dtype=np.float32) out_num_bg = ( np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) + out_num_fg * (cfg.MODEL.NUM_CLASSES - 2)) return blobs_out, out_num_fg, out_num_bg
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes): total_anchors = all_anchors.shape[0] straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH if straddle_thresh >= 0: # Only keep anchors inside the image by a margin of straddle_thresh # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all # anchors inds_inside = np.where( (all_anchors[:, 0] >= -straddle_thresh) & (all_anchors[:, 1] >= -straddle_thresh) & (all_anchors[:, 2] < im_width + straddle_thresh) & (all_anchors[:, 3] < im_height + straddle_thresh))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] else: inds_inside = np.arange(all_anchors.shape[0]) anchors = all_anchors num_inside = len(inds_inside) logger.debug('total_anchors: {}'.format(total_anchors)) logger.debug('inds_inside: {}'.format(num_inside)) logger.debug('anchors.shape: {}'.format(anchors.shape)) # Compute anchor labels: # label=1 is positive, 0 is negative, -1 is don't care (ignore) labels = np.empty((num_inside, ), dtype=np.int32) labels.fill(-1) if len(gt_boxes) > 0: # Compute overlaps between the anchors and the gt boxes overlaps anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes) # Map from anchor to gt box that has highest overlap anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) # For each anchor, amount of overlap with most overlapping gt box anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside), anchor_to_gt_argmax] # Map from gt box to an anchor that has highest overlap gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) # For each gt box, amount of overlap with most overlapping anchor gt_to_anchor_max = anchor_by_gt_overlap[ gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])] # Find all anchors that share the max overlap amount # (this includes many ties) anchors_with_max_overlap = np.where( anchor_by_gt_overlap == gt_to_anchor_max)[0] # Fg label: for each gt use anchors with highest overlap # (including ties) labels[anchors_with_max_overlap] = 1 # Fg label: above threshold IOU labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] # subsample negative labels if we have too many # (samples with replacement, but since the set of bg inds is large most # samples will not have repeats) num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0] if len(bg_inds) > num_bg: enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)] labels[enable_inds] = 0 bg_inds = np.where(labels == 0)[0] bbox_targets = np.zeros((num_inside, 4), dtype=np.float32) bbox_targets[fg_inds, :] = data_utils.compute_targets( anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]) # Bbox regression loss has the form: # loss(x) = weight_outside * L(weight_inside * x) # Inside weights allow us to set zero loss on an element-wise basis # Bbox regression is only trained on positive examples so we set their # weights to 1.0 (or otherwise if config is different) and 0 otherwise bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0) # The bbox regression loss only averages by the number of images in the # mini-batch, whereas we need to average by the total number of example # anchors selected # Outside weights are used to scale each element-wise loss so the final # average over the mini-batch is correct bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32) # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) bbox_outside_weights[labels == 1, :] = 1.0 / num_examples bbox_outside_weights[labels == 0, :] = 1.0 / num_examples # Map up to original set of anchors labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = data_utils.unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = data_utils.unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # Split the generated labels, etc. into labels per each field of anchors blobs_out = [] start_idx = 0 for foa in foas: H = foa.field_size W = foa.field_size A = foa.num_cell_anchors end_idx = start_idx + H * W * A _labels = labels[start_idx:end_idx] _bbox_targets = bbox_targets[start_idx:end_idx, :] _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :] _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :] start_idx = end_idx # labels output with shape (1, A, height, width) _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2) # bbox_targets output with shape (1, 4 * A, height, width) _bbox_targets = _bbox_targets.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_inside_weights output with shape (1, 4 * A, height, width) _bbox_inside_weights = _bbox_inside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) # bbox_outside_weights output with shape (1, 4 * A, height, width) _bbox_outside_weights = _bbox_outside_weights.reshape( (1, H, W, A * 4)).transpose(0, 3, 1, 2) blobs_out.append( dict(rpn_labels_int32_wide=_labels, rpn_bbox_targets_wide=_bbox_targets, rpn_bbox_inside_weights_wide=_bbox_inside_weights, rpn_bbox_outside_weights_wide=_bbox_outside_weights)) return blobs_out[0] if len(blobs_out) == 1 else blobs_out
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION polys_gt_inds = np.where( (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0) )[0] polys_gt = [roidb['segms'][i] for i in polys_gt_inds] boxes_from_polys = segm_utils.polys_to_boxes(polys_gt) fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False) ) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask > 0, dtype=np.int32) # Ensure it's binary masks[i, :] = np.reshape(mask, M**2) else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add Mask R-CNN specific blobs to the input blob dictionary.""" # Prepare the mask targets by associating one gt mask to each training roi # that has a fg (non-bg) class label. M = cfg.MRCNN.RESOLUTION # gao 6,29 gt_inds = np.where((roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0))[0] boxes_from_polys = roidb['boxes'][gt_inds, :] gt_classes = roidb['gt_classes'][gt_inds] im_label = cv2.imread(roidb['ins_seg'], 0) if roidb['flipped'] == 1: # convert flipped label to original im_label = im_label[:, ::-1] dataset_name = cfg.TRAIN.DATASETS[0] if 'LIP' in dataset_name: flipped_2_orig_class = { 14: 15, 15: 14, 16: 17, 17: 16, 18: 19, 19: 18 } if 'ATR' in dataset_name: flipped_2_orig_class = { 9: 10, 10: 9, 12: 13, 13: 12, 14: 15, 15: 14 } gt_classes_ = copy.deepcopy(gt_classes) for i in flipped_2_orig_class.keys(): index_i = np.where(gt_classes_ == i)[0] if len(index_i) == 0: continue gt_classes[index_i] = flipped_2_orig_class[i] # gt_inds_flip = np.where(gt_classes>13)[0] # for i in gt_inds_flip: # gt_classes[i] = flipped_2_orig_class[gt_classes[i]] fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_mask = blobs['labels_int32'].copy() roi_has_mask[roi_has_mask > 0] = 1 if fg_inds.shape[0] > 0: # Class labels for the foreground rois mask_class_labels = blobs['labels_int32'][fg_inds] masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True) # Find overlap between all foreground rois and the bounding boxes # enclosing each segmentation rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Map from each fg rois to the index of the mask with highest overlap # (measured by bbox overlap) fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # add fg targets for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] # poly_gt = polys_gt[fg_polys_ind] roi_fg = rois_fg[i] # Rasterize the portion of the polygon mask within the given fg roi # to an M x M binary image #logger.info('roi_fg, label shape: {},{}'.format(roi_fg,im_label.shape)) x0, y0, x1, y1 = roi_fg x0 = min(int(x0), im_label.shape[1]) x1 = min(int(x1 + 1), im_label.shape[1]) y0 = min(int(y0), im_label.shape[0]) y1 = min(int(y1 + 1), im_label.shape[0]) #logger.info('x0,y0,x1,y1: {}'.format(x0, y0, x1, y1)) mask_ = im_label[y0:y1, x0:x1] #logger.info('mask_ shape: {}, gt_classes[fg_polys_ind]:{}'.format(mask_.shape, boxes_from_polys[fg_polys_ind])) # mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M) mask = np.array(mask_ == gt_classes[fg_polys_ind], dtype=np.int32) # Ensure it's binary mask = cv2.resize(mask, (M, M), interpolation=cv2.INTER_NEAREST) masks[i, :] = np.reshape(mask, M**2) im_label = None else: # If there are no fg masks (it does happen) # The network cannot handle empty blobs, so we must provide a mask # We simply take the first bg roi, given it an all -1's mask (ignore # label), and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # rois_fg is actually one background roi, but that's ok because ... rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # We give it an -1's blob (ignore label) masks = -blob_utils.ones((1, M**2), int32=True) # We label it with class = 0 (background) mask_class_labels = blob_utils.zeros((1, )) # Mark that the first roi has a mask roi_has_mask[0] = 1 if cfg.MRCNN.CLS_SPECIFIC_MASK: masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Update blobs dict with Mask R-CNN blobs blobs['mask_rois'] = rois_fg blobs['roi_has_mask_int32'] = roi_has_mask blobs['masks_int32'] = masks
def evaluate_box_proposals(json_dataset, roidb, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2] ] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos }
def _merge_proposal_boxes_into_roidb(roidb, box_list): """Add proposal boxes to each roidb entry.""" assert len(box_list) == len(roidb) for i, entry in enumerate(roidb): boxes = box_list[i] num_boxes = boxes.shape[0] gt_overlaps = np.zeros( (num_boxes, entry['gt_overlaps'].shape[1]), dtype=entry['gt_overlaps'].dtype ) box_to_gt_ind_map = -np.ones( (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype ) # Note: unlike in other places, here we intentionally include all gt # rois, even ones marked as crowd. Boxes that overlap with crowds will # be filtered out later (see: _filter_crowd_proposals). gt_inds = np.where(entry['gt_classes'] > 0)[0] if len(gt_inds) > 0: gt_boxes = entry['boxes'][gt_inds, :] gt_classes = entry['gt_classes'][gt_inds] proposal_to_gt_overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False) ) # Gt box that overlaps each input box the most # (ties are broken arbitrarily by class order) argmaxes = proposal_to_gt_overlaps.argmax(axis=1) # Amount of that overlap maxes = proposal_to_gt_overlaps.max(axis=1) # Those boxes with non-zero overlap with gt boxes I = np.where(maxes > 0)[0] # Record max overlaps with the class of the appropriate gt box gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] box_to_gt_ind_map[I] = gt_inds[argmaxes[I]] entry['boxes'] = np.append( entry['boxes'], boxes.astype(entry['boxes'].dtype, copy=False), axis=0 ) entry['gt_classes'] = np.append( entry['gt_classes'], np.zeros((num_boxes), dtype=entry['gt_classes'].dtype) ) entry['seg_areas'] = np.append( entry['seg_areas'], np.zeros((num_boxes), dtype=entry['seg_areas'].dtype) ) entry['gt_overlaps'] = np.append( entry['gt_overlaps'].toarray(), gt_overlaps, axis=0 ) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append( entry['is_crowd'], np.zeros((num_boxes), dtype=entry['is_crowd'].dtype) ) entry['box_to_gt_ind_map'] = np.append( entry['box_to_gt_ind_map'], box_to_gt_ind_map.astype( entry['box_to_gt_ind_map'].dtype, copy=False ) )
def evaluate_box_proposals( json_dataset, roidb, thresholds=None, area='all', limit=None ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2]] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where( (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
def add_body_uv_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx): """Add DensePose specific blobs to the given inputs blobs dictionary.""" M = cfg.BODY_UV_RCNN.HEATMAP_SIZE # Prepare the body UV targets by associating one gt box which contains # body UV annotations to each training roi that has a fg class label. polys_gt_inds = np.where(roidb['ignore_UV_body'] == 0)[0] boxes_from_polys = roidb['boxes'][polys_gt_inds] # Select foreground RoIs fg_inds = np.where(blobs['labels_int32'] > 0)[0] roi_has_body_uv = np.zeros_like(blobs['labels_int32'], dtype=np.int32) if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)): # Find overlap between all foreground RoIs and the gt bounding boxes # containing each body UV annotaion. rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = box_utils.bbox_overlaps( rois_fg.astype(np.float32, copy=False), boxes_from_polys.astype(np.float32, copy=False)) # Select foreground RoIs as those with > 0.7 overlap fg_polys_value = np.max(overlaps_bbfg_bbpolys, axis=1) fg_inds = fg_inds[fg_polys_value > 0.7] if ((boxes_from_polys.shape[0] > 0) & (fg_inds.shape[0] > 0)): roi_has_body_uv[fg_inds] = 1 # Create body UV blobs # Dense masks, each mask for a given fg roi is of size M x M. part_inds = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) # Weights assigned to each target in `part_inds`. By default, all 1's. # part_inds_weights = blob_utils.zeros((fg_inds.shape[0], M, M), int32=True) part_inds_weights = blob_utils.ones((fg_inds.shape[0], M, M), int32=False) # 2D spatial coordinates (on the image). Shape is (#fg_rois, 2) in format # (x, y). coords_xy = blob_utils.zeros((fg_inds.shape[0], 196, 2), int32=False) # 24 patch indices plus a background class I_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=True) # UV coordinates in each patch U_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) V_points = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) # Uv_point_weights = blob_utils.zeros((fg_inds.shape[0], 196), int32=False) rois_fg = sampled_boxes[fg_inds] overlaps_bbfg_bbpolys = overlaps_bbfg_bbpolys[fg_inds] # Map from each fg roi to the index of the gt box with highest overlap fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1) # Add body UV targets for each fg roi for i in range(rois_fg.shape[0]): fg_polys_ind = fg_polys_inds[i] polys_gt_ind = polys_gt_inds[fg_polys_ind] # RLE encoded dense masks which are of size 256 x 256. # Map all part masks to 14 labels (i.e., indices of semantic body parts). dp_masks = dp_utils.GetDensePoseMask( roidb['dp_masks'][polys_gt_ind], cfg.BODY_UV_RCNN.NUM_SEMANTIC_PARTS) # Surface patch indices of collected points dp_I = np.array(roidb['dp_I'][polys_gt_ind], dtype=np.int32) # UV coordinates of collected points dp_U = np.array(roidb['dp_U'][polys_gt_ind], dtype=np.float32) dp_V = np.array(roidb['dp_V'][polys_gt_ind], dtype=np.float32) # dp_UV_weights = np.ones_like(dp_I).astype(np.float32) # Spatial coordinates on the image which are scaled such that the bbox # size is 256 x 256. dp_x = np.array(roidb['dp_x'][polys_gt_ind], dtype=np.float32) dp_y = np.array(roidb['dp_y'][polys_gt_ind], dtype=np.float32) # Do the flipping of the densepose annotation if roidb['flipped']: dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks = DP.get_symmetric_densepose( dp_I, dp_U, dp_V, dp_x, dp_y, dp_masks) roi_fg = rois_fg[i] gt_box = boxes_from_polys[fg_polys_ind] fg_x1, fg_y1, fg_x2, fg_y2 = roi_fg[0:4] gt_x1, gt_y1, gt_x2, gt_y2 = gt_box[0:4] fg_width = fg_x2 - fg_x1 fg_height = fg_y2 - fg_y1 gt_width = gt_x2 - gt_x1 gt_height = gt_y2 - gt_y1 fg_scale_w = float(M) / fg_width fg_scale_h = float(M) / fg_height gt_scale_w = 256. / gt_width gt_scale_h = 256. / gt_height # Sample M points evenly within the fg roi and scale the relative coordinates # (to associated gt box) such that the bounding box size is 256 x 256. x_targets = (np.arange(fg_x1, fg_x2, fg_width / M) - gt_x1) * gt_scale_w y_targets = (np.arange(fg_y1, fg_y2, fg_height / M) - gt_y1) * gt_scale_h # Construct 2D coordiante matrices x_targets, y_targets = np.meshgrid(x_targets[:M], y_targets[:M]) ## Another implementation option (which results in similar performance) # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=True, dtype=np.float32) - gt_x1) * gt_scale_w # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=True, dtype=np.float32) - gt_y1) * gt_scale_h # x_targets = (np.linspace(fg_x1, fg_x2, M, endpoint=False) - gt_x1) * gt_scale_w # y_targets = (np.linspace(fg_y1, fg_y2, M, endpoint=False) - gt_y1) * gt_scale_h # x_targets, y_targets = np.meshgrid(x_targets, y_targets) # Map dense masks of size 256 x 256 to target heatmap of size M x M. part_inds[i] = cv2.remap(dp_masks, x_targets.astype(np.float32), y_targets.astype(np.float32), interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0)) # Scale annotated spatial coordinates from bbox of size 256 x 256 to target # heatmap of size M x M. dp_x = (dp_x / gt_scale_w + gt_x1 - fg_x1) * fg_scale_w dp_y = (dp_y / gt_scale_h + gt_y1 - fg_y1) * fg_scale_h # Set patch index of points outside the heatmap as 0 (background). dp_I[dp_x < 0] = 0 dp_I[dp_x > (M - 1)] = 0 dp_I[dp_y < 0] = 0 dp_I[dp_y > (M - 1)] = 0 # Get body UV annotations of points inside the heatmap. points_inside = dp_I > 0 dp_x = dp_x[points_inside] dp_y = dp_y[points_inside] dp_I = dp_I[points_inside] dp_U = dp_U[points_inside] dp_V = dp_V[points_inside] # dp_UV_weights = dp_UV_weights[points_inside] # Update body UV blobs num_dp_points = len(dp_I) # coords_xy[i, 0:num_dp_points, 0] = i # fg_roi index coords_xy[i, 0:num_dp_points, 0] = dp_x coords_xy[i, 0:num_dp_points, 1] = dp_y I_points[i, 0:num_dp_points] = dp_I.astype(np.int32) U_points[i, 0:num_dp_points] = dp_U V_points[i, 0:num_dp_points] = dp_V # Uv_point_weights[i, 0:len(dp_UV_weights)] = dp_UV_weights else: # If there are no fg rois # The network cannot handle empty blobs, so we must provide a blob. # We simply take the first bg roi, give it an all 0's body UV annotations # and label it with class zero (bg). bg_inds = np.where(blobs['labels_int32'] == 0)[0] # `rois_fg` is actually one background roi, but that's ok because ... if len(bg_inds) == 0: rois_fg = sampled_boxes[0].reshape((1, -1)) else: rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1)) # Mark that the first roi has body UV annotation roi_has_body_uv[0] = 1 # We give it all 0's blobs part_inds = blob_utils.zeros((1, M, M), int32=True) part_inds_weights = blob_utils.zeros((1, M, M), int32=False) coords_xy = blob_utils.zeros((1, 196, 2), int32=False) I_points = blob_utils.zeros((1, 196), int32=True) U_points = blob_utils.zeros((1, 196), int32=False) V_points = blob_utils.zeros((1, 196), int32=False) # Uv_point_weights = blob_utils.zeros((1, 196), int32=False) # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2) rois_fg *= im_scale repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1)) rois_fg = np.hstack((repeated_batch_idx, rois_fg)) # Create body UV blobs for all patches (including background) K = cfg.BODY_UV_RCNN.NUM_PATCHES + 1 # Construct U/V_points blobs for all patches by repeating it #num_patches times. # Shape: (#rois, 196, K) U_points = np.repeat(U_points[:, :, np.newaxis], K, axis=-1) V_points = np.repeat(V_points[:, :, np.newaxis], K, axis=-1) uv_point_weights = np.zeros_like(U_points) # Set binary weights for UV targets in each patch for i in np.arange(1, K): uv_point_weights[:, :, i] = (I_points == i).astype(np.float32) # Update blobs dict with body UV blobs blobs['body_uv_rois'] = rois_fg blobs['roi_has_body_uv_int32'] = roi_has_body_uv # shape: (#rois,) blobs['body_uv_parts'] = part_inds # shape: (#rois, M, M) blobs['body_uv_parts_weights'] = part_inds_weights blobs['body_uv_coords_xy'] = coords_xy.reshape( -1, 2) # shape: (#rois * 196, 2) blobs['body_uv_I_points'] = I_points.reshape(-1, 1) # shape: (#rois * 196, 1) blobs['body_uv_U_points'] = U_points # shape: (#rois, 196, K) blobs['body_uv_V_points'] = V_points blobs['body_uv_point_weights'] = uv_point_weights