def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" # blobs = {'data': [], 'im_info': [], 'roidb': []} if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors # dataset_names = [] for im_i, entry in enumerate(roidb['frames_info']): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # dataset_names.append(entry['dataset']) # if 'training_16frames' in entry['file_name']: # blobs['dataset_name'].append('training') # elif 'validation_16frames' in entry['file_name']: # blobs['dataset_name'].append('validation') # elif 'test_16frames' in entry['file_name']: # blobs['dataset_name'].append('test') # else: # error('rpn.py: add dataset_name, not in train, val, or test') for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) minimal_roidb = [{} for _ in range(len(roidb['frames_info']))] for i, e in enumerate(roidb['frames_info']): minimal_roidb[i] = e # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb # blobs['dataset_name'] = dataset_names # Always return valid=True, since RPN minibatches are valid by design return True
def _create_anchors(): blobs = {} if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) blobs['anchors_%d' % lvl] = foa.cell_anchors else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors blobs['anchors'] = foa.cell_anchors return blobs
def generate_all_anchors(): k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE foas=[] for lvl in range(k_min, k_max + 1): stride = 2. ** lvl for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) return foas
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): #field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) field_stride = min(16., 2.**lvl) #anchor_sizes = (min(128., cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min)), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): #this is called each iteration """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2.**lvl for octave in range(scales_per_octave): octave_scale = 2**(octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors(stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i:i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2. ** lvl for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i: i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) blobs['anchors_%d' % lvl] = foa.cell_anchors foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors blobs['anchors'] = foa.cell_anchors # this is to add some fpn targets for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs(im_height, im_width, [foa], all_anchors, gt_rois) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) if cfg.TRAIN.CPP_RPN == 'all': for im_i, entry in enumerate(roidb): scale = im_scales[im_i] gt_inds = np.where(entry['gt_classes'] > 0)[0] # gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :] * scale gt_boxes = entry['boxes'][gt_inds, :] * scale gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1.) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) blobs['gt_boxes_%02d' % im_i] = np.hstack( (gt_boxes, gt_areas[:, np.newaxis])) # blobs['gt_boxes_%02d' % im_i] = entry['boxes'][gt_inds, :] blobs['gt_classes_%02d' % im_i] = entry['gt_classes'][gt_inds] else: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_attributes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2.**lvl for octave in range(scales_per_octave): octave_scale = 2**(octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors(stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0 \ or k.find('retnet_roi_bbox_targets') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i:i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.expand_dims(np.concatenate(v, axis=0), axis=0) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb return True
def add_rpn_blobs(blobs, im_scales, roidb): # LJY: called by minibatch.py """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( # foa: field of anchors field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors # LJY: 105840 x 4 array for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_rois = entry['boxes'][gt_inds, :] * scale # LJY: scale them # filter gt_rois based on SNIP ind = snip_valid(gt_rois) gt_rois = gt_rois[ind] # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' # gt_boxes = blob_utils.zeros((len(gt_inds), 6)) # gt_boxes[:, 0] = im_i # batch inds # gt_boxes[:, 1:5] = gt_rois # gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_rois) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( # LJY: call the function below im_height, im_width, [foa], all_anchors, gt_rois) # rpn_blobs: 'rpn_labels_int32_wide', 'rpn_bbox_targets_wide', # 'rpn_bbox_inside_weights_wide', 'rpn_bbox_outside_weights_wide' for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): # if k == 'has_mask': # ForkedPdb().set_trace() if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints', 'has_mask', 'image', 'parts_list', 'part_boxes' # LJ add image parts keys ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors( field_stride, anchor_sizes, anchor_aspect_ratios ) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors( cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS ) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0) )[0] gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois ) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois ) for k, v in rpn_blobs.items(): blobs[k].append(v) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] blobs['roidb'] = blob_utils.serialize(minimal_roidb) # Always return valid=True, since RPN minibatches are valid by design return True
def add_rpn_blobs(blobs, im_scales, roidb): """Add blobs needed training RPN-only and end-to-end Faster R-CNN models.""" if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL foas = [] for lvl in range(k_min, k_max + 1): field_stride = 2.**lvl anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ) anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes, anchor_aspect_ratios) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) else: foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS) all_anchors = foa.field_of_anchors for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] # Added to ignore anchors that have overlap with crowd area ignore_inds = np.where(entry['is_crowd'][gt_inds] == 1)[0] if len(ignore_inds) == 0: ignore_inds = None gt_rois = entry['boxes'][gt_inds, :] * scale # TODO(rbg): gt_boxes is poorly named; # should be something like 'gt_rois_info' gt_boxes = blob_utils.zeros((len(gt_inds), 6)) gt_boxes[:, 0] = im_i # batch inds gt_boxes[:, 1:5] = gt_rois gt_boxes[:, 5] = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # Add RPN targets if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: # RPN applied to many feature levels, as in the FPN paper rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs( im_height, im_width, foas, all_anchors, gt_rois, ignore_inds) for i, lvl in enumerate(range(k_min, k_max + 1)): for k, v in rpn_blobs[i].items(): blobs[k + '_fpn' + str(lvl)].append(v) else: # Classical RPN, applied to a single feature level rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs( im_height, im_width, [foa], all_anchors, gt_rois, ignore_inds) for k, v in rpn_blobs.items(): blobs[k].append(v) if cfg.TRAIN.VIS_ANCHOR: im = blobs['data'][0, :, :, :].squeeze() + np.array( cfg.PIXEL_MEANS).transpose((2, 0, 1)) idx = np.where(vis_labels == 1)[0] anchor_bboxes = vis_anchors[idx, :] if not osp.exists(cfg.TRAIN.VIS_ANCHOR_DIR): os.makedirs(cfg.TRAIN.VIS_ANCHOR_DIR) save_path = osp.join( cfg.TRAIN.VIS_ANCHOR_DIR, osp.splitext(os.path.basename(entry['image']))[0]) vis2d_utils.draw_pred_and_gt_tensor(im, gt_rois, save_path, anchor_bboxes) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: blobs[k] = np.concatenate(v) if cfg.LESION.USE_POSITION or cfg.LESION.POSITION_RCNN or cfg.LESION.SHALLOW_POSITION or cfg.LESION.MM_POS: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints', 'z_position' ] else: valid_keys = [ 'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints' ] minimal_roidb = [{} for _ in range(len(roidb))] for i, e in enumerate(roidb): for k in valid_keys: if k in e: minimal_roidb[i][k] = e[k] # blobs['roidb'] = blob_utils.serialize(minimal_roidb) blobs['roidb'] = minimal_roidb # Always return valid=True, since RPN minibatches are valid by design return True
def add_retinanet_blobs(blobs, im_scales, roidb, image_width, image_height): """Add RetinaNet blobs.""" # RetinaNet is applied to many feature levels, as in the FPN paper k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE num_aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS) aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE # get anchors from all levels for all scales/aspect ratios foas = [] for lvl in range(k_min, k_max + 1): stride = 2. ** lvl for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for idx in range(num_aspect_ratios): anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect_ratios[idx], ) foa = data_utils.get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave, idx) foas.append(foa) all_anchors = np.concatenate([f.field_of_anchors for f in foas]) blobs['retnet_fg_num'], blobs['retnet_bg_num'] = 0.0, 0.0 for im_i, entry in enumerate(roidb): scale = im_scales[im_i] im_height = np.round(entry['height'] * scale) im_width = np.round(entry['width'] * scale) gt_inds = np.where( (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] assert len(gt_inds) > 0, \ 'Empty ground truth empty for image is not allowed. Please check.' # --------------------------- vis image ------------------- #print("image:", entry['image']) img = cv2.imread(entry['image']) bb, gg, rr = cv2.split(img) img = cv2.merge([rr, gg, bb]) if entry['flipped']: img = cv2.flip(img, 1) #--------------------- segms -------------------- rects = [] coss = [] rois = [] for seg_index in range(len(entry['segms'])): seg = entry['segms'][seg_index] seg = [y for s in seg for y in s] #print(seg) try: seg = np.array(seg, dtype=np.float32) * scale cnt = np.reshape(seg, (-1, 1, 2)) rect = cv2.minAreaRect(cnt) except: w = entry['boxes'][seg_index][2] - entry['boxes'][seg_index][0] + 1 h = entry['boxes'][seg_index][3] - entry['boxes'][seg_index][1] + 1 x = entry['boxes'][seg_index][0] + 0.5 * w y = entry['boxes'][seg_index][1] + 0.5 * h rect = ((x * scale, y * scale), (w * scale, h * scale), 0.0) # ---------- restrict theta --------------------- #if (rect[1][0] == rect[1][1]): # a = b # rect = ((rect[0][0], rect[0][1]), (rect[1][0], rect[1][1]), 0.0) #if (rect < -45): # rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] + 90.0) # --------- restrict a > b & theta------------------- if (rect[1][0] < rect[1][1]): rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] + 90.0) # -------------- vis rect ----------------------- #w = entry['boxes'][seg_index][2] - entry['boxes'][seg_index][0] + 1 #h = entry['boxes'][seg_index][3] - entry['boxes'][seg_index][1] + 1 #left = entry['boxes'][seg_index][0] #top = entry['boxes'][seg_index][1] #cv2.rectangle(img, (int(left), int(top)), (int(left + w), int(top + h)), (0, 255, 0), 2) #color = [int(random.random() * 255), int(random.random() * 255), int(random.random() * 255)] #ell = (rect[0][0] / scale, rect[0][1] / scale), (rect[1][0] / scale, rect[1][1] / scale), rect[2] #cv2.ellipse(img, ell, color, 2) #if rect[1][0] < rect[1][1]: # rect = ((rect[0][0], rect[0][1]), (rect[1][1], rect[1][0]), rect[2] - 90.0) # -----------------------# #print("rect", rect) x1 = rect[0][0] y1 = rect[0][1] a = rect[1][0] b = rect[1][1] theta = rect[2] theta_pi = theta * np.pi / 180 T = np.array([[np.cos(theta_pi), -np.sin(theta_pi)], [np.sin(theta_pi), np.cos(theta_pi)]]) C = np.array([[4.0 / (a * a), 0],[0, 4.0 / (b * b)]]) M = np.dot(np.dot(np.transpose(T), C), T) dy = np.sqrt(4 * M[0][0] / (4 * M[0][0] * M[1][1] - (M[0][1] + M[1][0]) ** 2)) dx = np.sqrt(4 * M[1][1] / (4 * M[0][0] * M[1][1] - (M[0][1] + M[1][0]) ** 2)) x_min = x1 - dx x_max = x1 + dx y_min = y1 - dy y_max = y1 + dy #bbox_fit = [max(x_min, 0), max(y_min, 0), x_max - x_min, y_max - y_min] #print(x_min, x_max, y_min, y_max) rois.append([x_min, y_min, x_max, y_max]) rects.append([(x_min + x_max) / 2.0, (y_min + y_max) / 2.0, x_max - x_min, y_max - y_min]) #rois.append([rect[0][0] - rect[1][0] / 2.0, rect[0][1] - rect[1][1] / 2.0, rect[0][0] + rect[1][0] / 2.0, rect[0][1] + rect[1][1] / 2.0]) #rects.append([rect[0][0], rect[0][1], rect[1][0], rect[1][1]]) # x, y, a, b cos = [] theta_segs = 4 for i in range(theta_segs): cos.append(math.cos((rect[2] * 2.0 - i * (360.0 / theta_segs)) / 180.0 * math.pi)) coss.append(cos) # -------------------- gt rects ------------- gt_rects = np.array(rects)[gt_inds, :] # [[x, y, a, b]] gt_cos = np.array(coss)[gt_inds, :] # [[cos(t- i *10), .., ]] gt_rois = np.array(rois, dtype=np.float32)[gt_inds, :] #gt_rois = entry['boxes'][gt_inds, :] * scale gt_classes = entry['gt_classes'][gt_inds] im_info = np.array([[im_height, im_width, scale]], dtype=np.float32) blobs['im_info'].append(im_info) # ------------------ edit func ----------------------------- retinanet_blobs, fg_num, bg_num = _get_retinanet_blobs( foas, all_anchors, gt_rois, gt_classes, gt_rects, gt_cos, image_width, image_height) for i, foa in enumerate(foas): for k, v in retinanet_blobs[i].items(): # the way it stacks is: # [[anchors for image1] + [anchors for images 2]] level = int(np.log2(foa.stride)) key = '{}_fpn{}'.format(k, level) if k == 'retnet_roi_fg_bbox_locs': v[:, 0] = im_i # loc_stride: 80 * 4 if cls_specific else 4 loc_stride = 4 # 4 coordinate corresponding to bbox prediction if cfg.RETINANET.CLASS_SPECIFIC_BBOX: loc_stride *= (cfg.MODEL.NUM_CLASSES - 1) anchor_ind = foa.octave * num_aspect_ratios + foa.aspect # v[:, 1] is the class label [range 0-80] if we do # class-specfic bbox otherwise it is 0. In case of class # specific, based on the label, the location of current # anchor is class_label * 4 and then we take into account # the anchor_ind if the anchors v[:, 1] *= 4 v[:, 1] += loc_stride * anchor_ind blobs[key].append(v) blobs['retnet_fg_num'] += fg_num blobs['retnet_bg_num'] += bg_num blobs['retnet_fg_num'] = blobs['retnet_fg_num'].astype(np.float32) blobs['retnet_bg_num'] = blobs['retnet_bg_num'].astype(np.float32) N = len(roidb) for k, v in blobs.items(): if isinstance(v, list) and len(v) > 0: # compute number of anchors A = int(len(v) / N) # for the cls branch labels [per fpn level], # we have blobs['retnet_cls_labels_fpn{}'] as a list until this step # and length of this list is N x A where # N = num_images, A = num_anchors for example, N = 2, A = 9 # Each element of the list has the shape 1 x 1 x H x W where H, W are # spatial dimension of curret fpn lvl. Let a{i} denote the element # corresponding to anchor i [9 anchors total] in the list. # The elements in the list are in order [[a0, ..., a9], [a0, ..., a9]] # however the network will make predictions like 2 x (9 * 80) x H x W # so we first concatenate the elements of each image to a numpy array # and then concatenate the two images to get the 2 x 9 x H x W if k.find('retnet_cls_labels') >= 0: tmp = [] # concat anchors within an image for i in range(0, len(v), A): tmp.append(np.concatenate(v[i: i + A], axis=1)) # concat images blobs[k] = np.concatenate(tmp, axis=0) else: # for the bbox branch elements [per FPN level], # we have the targets and the fg boxes locations # in the shape: M x 4 where M is the number of fg locations in a # given image at the current FPN level. For the given level, # the bbox predictions will be. The elements in the list are in # order [[a0, ..., a9], [a0, ..., a9]] # Concatenate them to form M x 4 blobs[k] = np.concatenate(v, axis=0) return True