def _create_cell_anchors(): """ Generate all types of anchors for all fpn levels/scales/aspect ratios. This function is called only once at the beginning of inference. """ k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE A = scales_per_octave * len(aspect_ratios) anchors = {} for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = np.zeros((A, 4)) a = 0 for octave in range(scales_per_octave): octave_scale = 2**(octave / float(scales_per_octave)) for aspect in aspect_ratios: anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect, ) cell_anchors[a, :] = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios) a += 1 anchors[lvl] = cell_anchors return anchors
def get_anchors(spatial_scale, anchor_sizes): anchors = generate_anchors.generate_anchors( stride=1.0 / spatial_scale, sizes=anchor_sizes, aspect_ratios=cfg.RPN.ASPECT_RATIOS, ).astype(np.float32) return anchors
def _create_cell_anchors(): """ Generate all types of anchors for all fpn levels/scales/aspect ratios. This function is called only once at the beginning of inference. """ k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE aspect_ratios = cfg.RETINANET.ASPECT_RATIOS anchor_scale = cfg.RETINANET.ANCHOR_SCALE A = scales_per_octave * len(aspect_ratios) anchors = {} for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = np.zeros((A, 4)) a = 0 for octave in range(scales_per_octave): octave_scale = 2 ** (octave / float(scales_per_octave)) for aspect in aspect_ratios: anchor_sizes = (stride * octave_scale * anchor_scale, ) anchor_aspect_ratios = (aspect, ) cell_anchors[a, :] = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios) a += 1 anchors[lvl] = cell_anchors return anchors
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois') # Alias da_rois to rpn_rois for inference model.net.Alias('rpn_rois', 'da_rois')
def get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None ): global _threadlocal_foa if not hasattr(_threadlocal_foa, 'cache'): _threadlocal_foa.cache = {} cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) if cache_key in _threadlocal_foa.cache: return _threadlocal_foa.cache[cache_key] # Anchors at a single feature cell cell_anchors = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios ) num_cell_anchors = cell_anchors.shape[0] # Generate canonical proposals from shifted anchors # Enumerate all shifted positions on the (H, W) grid fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) ) field_size = int(np.ceil(fpn_max_size / float(stride))) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.ravel() shift_y = shift_y.ravel() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Broacast anchors over shifts to enumerate all anchors at all positions # in the (H, W) grid: # - add A cell anchors of shape (1, A, 4) to # - K shifts of shape (K, 1, 4) to get # - all shifted anchors of shape (K, A, 4) # - reshape to (K*A, 4) shifted anchors A = num_cell_anchors K = shifts.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) ) field_of_anchors = field_of_anchors.reshape((K * A, 4)) foa = FieldOfAnchors( field_of_anchors=field_of_anchors.astype(np.float32), num_cell_anchors=num_cell_anchors, stride=stride, field_size=field_size, octave=octave, aspect=aspect ) _threadlocal_foa.cache[cache_key] = foa return foa
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv(bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b') model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b') # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b') if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS) rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc)
def get_anchors(spatial_scale): anchors = generate_anchors.generate_anchors( stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS).astype(np.float32) return anchors
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors( stride=1./ spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS ) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv( blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv( 'conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # model.Conv( 'conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train or cfg.MODEL.FASTER_RCNN: model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals( ['rpn_cls_probs', 'rpn_bbox_pred','im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale ) if cfg.MODEL.FASTER_RCNN: if model.train: model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: model.net.Alias('rpn_rois', 'rois')
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in # 6 2, 一共5层 k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) # fpn的第一层构建,后面的层共享 if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation # 首先接一个卷积层 conv_rpn_fpn = model.Conv(bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores # 每个anchor包含目标的概率 rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas # 每个anchor的回归量 rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b') model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b') # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b') # 为后面的目标分类和回归提供最好的训练样本 if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed # 在进行训练的时候,需要选择区域候选 # 获得本层使用的anchor lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS) # slvl是fpn中的层数[2, ..., 6] # 添加Sigmoid rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl) # im_info (网络输入的高度, 网络输入的宽度,原始图片到网络输入的放缩比例) # 获得(R, 5), 每个ROI是相对于网络输入尺寸的矩形 model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], # 输入 ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], # 输出 anchors=lvl_anchors, spatial_scale=sc)
def get_field_of_anchors( stride, anchor_sizes, anchor_aspect_ratios, octave=None, aspect=None ): global _threadlocal_foa if not hasattr(_threadlocal_foa, 'cache'): _threadlocal_foa.cache = {} cache_key = str(stride) + str(anchor_sizes) + str(anchor_aspect_ratios) if cache_key in _threadlocal_foa.cache: return _threadlocal_foa.cache[cache_key] # Anchors at a single feature cell # 一个特征单元格的anchor, cell->格子 cell_anchors = generate_anchors( stride=stride, sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios ) # 3 num_cell_anchors = cell_anchors.shape[0] # Generate canonical proposals from shifted anchors # Enumerate all shifted positions on the (H, W) grid # TRAIN.MAX_SIZE对这里会产生影响 # 在构建fpn时从后往前进行,所以这里先计算最后一层的输出 fpn_max_size = cfg.FPN.COARSEST_STRIDE * np.ceil( cfg.TRAIN.MAX_SIZE / float(cfg.FPN.COARSEST_STRIDE) ) # 该层的特征单元的个数 field_size = int(np.ceil(fpn_max_size / float(stride))) # 有了所有中心点的坐标,在加上anchor的偏移,就得到了RPN # shifts为对应于网络输入的像素坐标 shifts = np.arange(0, field_size) * stride # 对应于anchor中心点在网络输入中的坐标集合 shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.ravel() shift_y = shift_y.ravel() # 形状为(., 4) shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Broacast anchors over shifts to enumerate all anchors at all positions # in the (H, W) grid: # - add A cell anchors of shape (1, A, 4) to # - K shifts of shape (K, 1, 4) to get # - all shifted anchors of shape (K, A, 4) # - reshape to (K*A, 4) shifted anchors # anchor的个数 A = num_cell_anchors # 有多少个位置 K = shifts.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) ) field_of_anchors = field_of_anchors.reshape((K * A, 4)) # 特征图中所有位置的anchor,以及构造信息 foa = FieldOfAnchors( field_of_anchors=field_of_anchors.astype(np.float32), num_cell_anchors=num_cell_anchors, stride=stride, field_size=field_size, octave=octave, aspect=aspect ) _threadlocal_foa.cache[cache_key] = foa return foa