def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_retinanet_blobs(blobs, im_tr_matrix, roidb, image_width, image_height, im_scales): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2.**lvl for octave in range(scales_per_octave): octave_scale = 2**(octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors(stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): transformation_matrix = im_tr_matrix[im_i] scale = im_scales[im_i] # image_height = im_shapes[im_i][0] # image_width = im_shapes[im_i][1] gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' # gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] gt_rois = [] for gt_roi in roidb[im_i]['boxes']: w, h = gt_roi[2] - gt_roi[0], gt_roi[3] - gt_roi[1] nw, nh = int(w * scale), int(h * scale) center_x, center_y = gt_roi[0] + w / 2, gt_roi[1] + h / 2 new_center = np.dot(transformation_matrix, [[center_x], [center_y], [1.0]]).astype('int') new_center_x = int(new_center[0][0]) new_center_y = int(new_center[1][0]) nbx = int(new_center_x - nw / 2) nby = int(new_center_y - nh / 2) nbx2 = int(nbx + nw) nby2 = int(nby + nh) gt_rois.append([nbx, nby, nbx2, nby2]) im_info = np.array([[image_height, image_width, scale]], dtype=np.float32) matrix = np.array(transformation_matrix, dtype=np.float32) blobs['im_info'].append(im_info) blobs['im_tr_matrix'].append(matrix) gt_rois = np.asarray(gt_rois, dtype=np.float32) # im_info = np.array([[image_height, image_width, scale]], dtype=np.float32) # blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i:i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level if cfg.TRAIN.DOMAIN_ADAPTATION: rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois, entry['is_source']) else: rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) if cfg.TRAIN.DOMAIN_ADAPTATION: # print(roidb[0].keys()) # print(len(roidb[0]['boxes'])) da_label = np.zeros((1, 2, 36, 67), dtype=np.int32) if entry['is_source']: blobs['is_source'].append(np.full((1, ), True, dtype=np.bool_)) blobs['da_label'].append(np.zeros((1, 36, 67), dtype=np.int32)) else: blobs['is_source'].append(np.full((1, ), False, dtype=np.bool_)) blobs['da_label'].append(np.ones((1, 36, 67), dtype=np.int32)) #blobs['da_label'].append(da_label) else: blobs['is_source'].append(np.full((1, ), True, dtype=np.bool_)) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] if cfg.TRAIN.DOMAIN_ADAPTATION: valid_keys += ['is_source', 'da_label'] else: valid_keys += ['is_source'] # blobs['da_label']=np.zeros((1,2,36,67), dtype=np.int32) # if roidb[0]['is_source']: # blobs['is_source']=np.full((1,),True,dtype=np.bool_) # blobs['da_label'][:,0,:,:] = 1 # blobs['dc_label']=np.zeros((256,), dtype=np.int32) # else: # blobs['is_source']=np.full((1,),False,dtype=np.bool_) # blobs['da_label'][:,1,:,:] = 1 # blobs['dc_label']=np.ones((256,), dtype=np.int32) minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors if 0: for jj, sub_roidb in enumerate(roidb): for zz, sub_classes in enumerate(sub_roidb['gt_classes']): if sub_classes == 4: if len(sub_roidb['gt_classes']) > 1: print('bb') roidb[jj]['gt_classes'][zz] = 0 roidb[jj]['max_classes'][zz] = 0 roidb[jj]['boxes'][zz] = np.array([[0.1, 0.1, 1.0, 1.0]], dtype=np.float32) for im_i, entry in enumerate(roidb): try: scale = im_scales[im_i] except: print('error') im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) if 0: for ii, sub_gt_classes in enumerate(entry['gt_classes']): if sub_gt_classes == 0: print('aaa') gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" """ 添加训练faster rcnn需要的blobs """ if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): # lvl = 2 => 4.0 field_stride = 2.**lvl # 32.0 anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) # [0.5, 1.0, 2.0] anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS # 对于一个特征图,获得特征图上每一个cell所对应的anchor, # 该anchor对应于网络输入的大小 foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] # * scale获得相对于网络输入的信息 im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # gt box gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) # 属于哪个图片 gt_boxes[:, 0] = im_i # batch inds # box gt_boxes[:, 1:5] = gt_rois # 类别信息 gt_boxes[:, 5] = entry['gt_classes'][gt_inds] # 写入blob im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets # 添加RPN的目标值 if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] # fetch the spatial scales for every FPN level except fpn6 #fpn_spatial_scales = globals().get('fpn_level_info_' + cfg.MODEL.BACKBONE_NAME + '_conv5')().spatial_scales fpn_spatial_scales = getattr(FPN, 'fpn_level_info_' + cfg.MODEL.BACKBONE_NAME + '_conv5')().spatial_scales for lvl in range(k_min, k_max): field_stride = 1. / fpn_spatial_scales[k_max-lvl-1] #field_stride = 2.**(lvl - int(math.log(cfg.FPN.FINEST_LEVEL_SCALE,2))-2) anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) # for p6, the scale should be the corest level divided by 2 if k_max == 6: field_stride = 2 * (1. / fpn_spatial_scales[0]) anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(k_max - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # 如果设置了RPN # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) # print("--------------------------", all_anchors,len(all_anchors)) 区域内每一个点的9个框 # print("----------------------------------FOAS:{},num:{}".format(foas,len(foas))) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) #print("rpn.py 91 entry['gt_classes']",entry['gt_classes']) gt_inds = np.where((entry['gt_classes'] >= 0) & (entry['is_crowd'] == 0))[0] # #-------------------------------------------------------------------------------- #print("rpn.py 96 gt_inds!!!!!!!!:----------------------",gt_inds) # if gt_inds == [0] : # print(entry['boxes']) # # gt_rois = entry['boxes'][0, :] * scale # [x_*scale for x_ in entry['boxes'][0]] # else: # gt_rois = entry['boxes'][gt_inds, :] * scale # #-------------------------------------------------------------------------------- # #-------------------------------------------------------------------------------- # print("gt_inds!!!!!!!!:----------------------",gt_inds) # if gt_inds == [0] : # print(entry['boxes']) # # gt_rois = entry['boxes'][0, :] * scale # [x_*scale for x_ in entry['boxes'][0]] # else: gt_rois = entry['boxes'][gt_inds, :] * scale # #-------------------------------------------------------------------------------- im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2. ** lvl for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i: i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True