def __init__(self, rpn_layers, global_buffers, inference=False, num_rois=128, pre_nms_N=12000, post_nms_N=2000, nms_thresh=0.7, min_bbox_size=16, num_classes=21, fg_fraction=None, fg_thresh=None, bg_thresh_hi=None, bg_thresh_lo=None, deterministic=False, name=None, debug=False): """ Arguments: rpn_layers (list): References to the RPN layers: [RPN_1x1_obj, RPN_1x1_bbox] target_buffers (tuple): Target buffers for training fast-rcnn: (class, bbox regression) num_rois (int, optional): Number of ROIs to sample from proposed (default: 128) pre_nms_N (int, optional): Number of ROIs to retain before using NMS (default: 12000) post_nms_N (int, optional): Number of ROIs to retain after using NMS (default: 2000) nms_thresh (float, optional): Threshold for non-maximum supression (default: 0.7) min_bbox_size (integer, optional): Minimize bboxes side length (default: 16) name (string, optional): Name of layer (default: None) """ super(ProposalLayer, self).__init__(name) self.rpn_obj, self.rpn_bbox = rpn_layers self.num_rois = num_rois self.pre_nms_N = pre_nms_N self.post_nms_N = post_nms_N self.nms_thresh = nms_thresh self.min_bbox_size = min_bbox_size self.num_classes = num_classes self.fg_fraction = fg_fraction if fg_fraction else FG_FRACTION self.fg_thresh = fg_thresh if fg_thresh else FG_THRESH self.bg_thresh_hi = bg_thresh_hi if bg_thresh_hi else BG_THRESH_HI self.bg_thresh_lo = bg_thresh_lo if bg_thresh_lo else BG_THRESH_LO self.deterministic = deterministic self.debug = debug # the output shape of this layer depends on whether the network # will be used for inference. In inference mode, the output shape is # (5, post_nms_N). For training, a smaller set of rois are sampled, yielding # an output shape of (5, num_rois) self.inference = inference # set references to dataset object buffers self.target_buffers = global_buffers['target_buffers'] self.im_shape, self.im_scale = global_buffers['img_info'] self.gt_boxes, self.gt_classes, self.num_gt_boxes = global_buffers['gt_boxes'] self._conv_size, self._scale = global_buffers['conv_config'] # generate anchors and load onto device # self._anchors has shape (KHW, 4) self._anchors = generate_all_anchors(self._conv_size, self._conv_size, self._scale) self._dev_anchors = self.be.array(self._anchors) self._num_anchors = self._anchors.shape[0]
def add_anchors(self, roi_db): # adds a database of anchors # 1. for each i in (H,W), generate k=9 anchor boxes centered on i # 2. compute each anchor box against ground truth # 3. assign each anchor to positive (1), negative (0), or ignored (-1) # 4. for positive anchors, store the bbtargets # 1. # generate list of K anchor boxes, where K = # ratios * # scales # anchor boxes are coded as [xmin, ymin, xmax, ymax] all_anchors = generate_all_anchors(self._conv_size, self._conv_size, self.SCALE) # all_anchors are in (CHW) order, matching the CHWN output of the conv layer. assert self._total_anchors == all_anchors.shape[0] # 2. # Iterate through each image, and build list of positive/negative anchors for db in roi_db: im_scale, im_shape = self.calculate_scale_shape(db['img_shape']) # only keep anchors inside image idx_inside = inside_im_bounds(all_anchors, im_shape) if DEBUG: neon_logger.display('im shape', im_shape) neon_logger.display('idx inside', len(idx_inside)) anchors = all_anchors[idx_inside, :] labels = np.empty((len(idx_inside), ), dtype=np.float32) labels.fill(-1) # compute bbox overlaps overlaps = calculate_bb_overlap(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(db['gt_bb'] * im_scale, dtype=np.float)) # assign bg labels first bg_idx = overlaps.max(axis=1) < self.NEGATIVE_OVERLAP labels[bg_idx] = 0 # assing fg labels # 1. for each gt box, anchor with higher overlaps [including ties] gt_idx = np.where(overlaps == overlaps.max(axis=0))[0] labels[gt_idx] = 1 # 2. any anchor above the overlap threshold with any gt box fg_idx = overlaps.max(axis=1) >= self.POSITIVE_OVERLAP labels[fg_idx] = 1 if DEBUG: neon_logger.display('max_overlap: {}'.format(overlaps.max())) neon_logger.display('Assigned {} bg labels'.format(bg_idx.sum())) neon_logger.display('Assigned {}+{} fg labels'.format(fg_idx.sum(), len(gt_idx))) neon_logger.display('Total fg labels: {}'.format(np.sum(labels == 1))) neon_logger.display('Total bg labels: {}'.format(np.sum(labels == 0))) # For every anchor, compute the regression target compared # to the gt box that it has the highest overlap with # the indicies of labels should match these targets bbox_targets = np.zeros((len(idx_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(db['gt_bb'][overlaps.argmax(axis=1), :] * im_scale, anchors) # store class label of max_overlap gt to use in normalization gt_max_overlap_classes = overlaps.argmax(axis=1) # store results in database db['labels'] = labels db['bbox_targets'] = bbox_targets db['max_classes'] = gt_max_overlap_classes db['total_anchors'] = self._total_anchors db['idx_inside'] = idx_inside db['im_width'] = im_shape[0] db['im_height'] = im_shape[1] return roi_db
def add_anchors(self, roi_db): # adds a database of anchors # 1. for each i in (H,W), generate k=9 anchor boxes centered on i # 2. compute each anchor box against ground truth # 3. assign each anchor to positive (1), negative (0), or ignored (-1) # 4. for positive anchors, store the bbtargets # 1. # generate list of K anchor boxes, where K = # ratios * # scales # anchor boxes are coded as [xmin, ymin, xmax, ymax] all_anchors = generate_all_anchors(self._conv_size, self._conv_size, self.SCALE) # all_anchors are in (CHW) order, matching the CHWN output of the conv layer. assert self._total_anchors == all_anchors.shape[0] # 2. # Iterate through each image, and build list of positive/negative anchors for db in roi_db: im_scale, im_shape = self.calculate_scale_shape(db['img_shape']) # only keep anchors inside image idx_inside = inside_im_bounds(all_anchors, im_shape) if DEBUG: neon_logger.display('im shape', im_shape) neon_logger.display('idx inside', len(idx_inside)) anchors = all_anchors[idx_inside, :] labels = np.empty((len(idx_inside), ), dtype=np.float32) labels.fill(-1) # compute bbox overlaps overlaps = calculate_bb_overlap( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(db['gt_bb'] * im_scale, dtype=np.float)) # assign bg labels first bg_idx = overlaps.max(axis=1) < self.NEGATIVE_OVERLAP labels[bg_idx] = 0 # assing fg labels # 1. for each gt box, anchor with higher overlaps [including ties] gt_idx = np.where(overlaps == overlaps.max(axis=0))[0] labels[gt_idx] = 1 # 2. any anchor above the overlap threshold with any gt box fg_idx = overlaps.max(axis=1) >= self.POSITIVE_OVERLAP labels[fg_idx] = 1 if DEBUG: neon_logger.display('max_overlap: {}'.format(overlaps.max())) neon_logger.display('Assigned {} bg labels'.format( bg_idx.sum())) neon_logger.display('Assigned {}+{} fg labels'.format( fg_idx.sum(), len(gt_idx))) neon_logger.display('Total fg labels: {}'.format( np.sum(labels == 1))) neon_logger.display('Total bg labels: {}'.format( np.sum(labels == 0))) # For every anchor, compute the regression target compared # to the gt box that it has the highest overlap with # the indicies of labels should match these targets bbox_targets = np.zeros((len(idx_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( db['gt_bb'][overlaps.argmax(axis=1), :] * im_scale, anchors) # store class label of max_overlap gt to use in normalization gt_max_overlap_classes = overlaps.argmax(axis=1) # store results in database db['labels'] = labels db['bbox_targets'] = bbox_targets db['max_classes'] = gt_max_overlap_classes db['total_anchors'] = self._total_anchors db['idx_inside'] = idx_inside db['im_width'] = im_shape[0] db['im_height'] = im_shape[1] return roi_db