def proposal_target_layer(rpn_bbox, rpn_cls_prob, gt_boxes, num_classes): confidence_scores = rpn_cls_prob[:, 1] # Add ground truth boxes as part of proposals rpn_bbox = np.vstack([rpn_bbox, gt_boxes[:, 0:-1]]) confidence_scores = np.concatenate(confidence_scores, np.ones(gt_boxes.shape[0], np.float32)) # Sample objects and backgrounds fg_cnt = int(BATCH_SIZE * FG_RATIO) fg_idxs = np.where(confidence_scores >= 0.5)[0] if len(fg_idxs) > fg_cnt: pos_inds = np.random.choice(fg_idxs, size=fg_cnt, replace=False) bg_cnt = BATCH_SIZE - len(pos_inds) bg_idxs = np.where((confidence_scores >= 0.1) & (confidence_scores < 0.5))[0] if len(bg_idxs) > bg_cnt: neg_inds = np.random.choice(bg_idxs, size=bg_cnt, replace=False) pos_bbox = rpn_bbox[pos_inds] overlaps = bbox.bbox_overlaps(pos_bbox, gt_boxes[:, 0:-1]) argmax_overlaps = np.argmax(overlaps, axis=-1) pos_labels = gt_boxes[:, -1][argmax_overlaps] + 1 neg_labels = np.zeros(len(neg_inds), np.int32) labels = np.concatenate(pos_labels, neg_labels) bbox_reg = np.zeros([len(labels), (num_classes + 1) * 4], np.float32) bbox_reg_ = bbox.bbox_transform(rpn_bbox, gt_boxes[argmax_overlaps][:, :-1]) for i in range(len(pos_labels)): bbox_reg[i, pos_labels[i] * 4:(pos_labels[i] + 1) * 4] = bbox_reg_[i] neg_bbox = rpn_bbox[neg_inds] rpn_bbox = np.vstack([pos_bbox, neg_bbox]) return labels, bbox_reg, rpn_bbox
def anchor_target_layer(gt_boxes, all_anchors, image_shape, feature_map_shape, k): """ :param gt_boxes: :param all_anchors: :param image_shape: :param feature_map_shape: :param k: :return: """ # If there is no object in the image if len(gt_boxes) == 0: labels = np.zeros((len(all_anchors),), dtype=np.int32) targets = np.zeros(all_anchors.shape, dtype=np.float32) return labels, targets num_total_anchors = all_anchors.shape[0] # Keep anchors that inside the image valid_idx = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < image_shape[1]) & (all_anchors[:, 3] < image_shape[0]))[0] anchors = all_anchors[valid_idx, :] labels = np.empty((len(valid_idx),), dtype=np.int32) labels.fill(-1) overlaps = bbox.bbox_overlaps(anchors, gt_boxes) argmax_overlaps = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(0, len(valid_idx), 1), argmax_overlaps] gt_argmax_overlaps = np.argmax(overlaps, axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(0, gt_boxes.shape[0], 1)] gt_max_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[np.where(max_overlaps < BG_LOW_THRES)[0]] = 0 labels[gt_max_overlaps] = 1 labels[np.where(max_overlaps > FG_HIGH_THRES)[0]] = 1 targets = bbox.bbox_transform(anchors, gt_boxes[argmax_overlaps, :]) # Sampling positive and negative anchors fg_cnt = int(SAMPLE_NUMBER * FG_RATIO) fg_idxs = np.where(labels == 1)[0] if len(fg_idxs) > fg_cnt: disable_inds = np.random.choice( fg_idxs, size=(len(fg_idxs) - fg_cnt), replace=False) labels[disable_inds] = -1 bg_cnt = SAMPLE_NUMBER - np.sum(labels == 1) bg_idxs = np.where(labels == 0)[0] if len(bg_idxs) > bg_cnt: disable_inds = np.random.choice( bg_idxs, size=(len(bg_idxs) - bg_cnt), replace=False) labels[disable_inds] = -1 labels = _unmap(labels, num_total_anchors, valid_idx, -1) targets = _unmap(targets, num_total_anchors, valid_idx, 0) return labels, targets
def worker(self, data): im_info, cur_crop, im_scale, nids, gtids, gt_boxes, boxes, classes = data[0:8] has_mask = True if len(data) > 8 else False anchors = self.all_anchors.copy() inds_inside = np.where((anchors[:, 0] >= -32) & (anchors[:, 1] >= -32) & (anchors[:, 2] < im_info[0] + 32) & (anchors[:, 3] < im_info[1] + 32))[0] anchors = anchors[inds_inside, :] labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) total_anchors = int(self.K * self.num_anchors) gt_boxes[:, 0] -= cur_crop[0] gt_boxes[:, 2] -= cur_crop[0] gt_boxes[:, 1] -= cur_crop[1] gt_boxes[:, 3] -= cur_crop[1] vgt_boxes = boxes[np.intersect1d(gtids, nids)] vgt_boxes[:, 0] -= cur_crop[0] vgt_boxes[:, 2] -= cur_crop[0] vgt_boxes[:, 1] -= cur_crop[1] vgt_boxes[:, 3] -= cur_crop[1] gt_boxes = clip_boxes(np.round(gt_boxes * im_scale), im_info[:2]) vgt_boxes = clip_boxes(np.round(vgt_boxes * im_scale), im_info[:2]) ids = filter_boxes(gt_boxes, 10) if len(ids) == 0: gt_boxes = np.zeros((0, 4)) classes = np.zeros((0, 1)) if has_mask: mask_polys = data[8] # Shift and crop the mask polygons mask_polys = crop_polys(mask_polys, cur_crop, im_scale) # Create the padded encoded array if len(ids) > 0: polylen = len(mask_polys) tmask_polys = [] tgt_boxes = [] tclasses = [] for i in ids: if i < polylen: tmask_polys.append(mask_polys[i]) tgt_boxes.append(gt_boxes[i]) tclasses.append(classes[i]) if len(gt_boxes) > 0: gt_boxes = np.array(tgt_boxes) classes = np.array(tclasses).reshape(len(tclasses), 1) mask_polys = tmask_polys else: gt_boxes = np.zeros((0, 4)) classes = np.zeros((0, 1)) encoded_polys = poly_encoder(mask_polys, classes[:, 0] - 1, max_poly_len=self.max_poly_len, max_n_gts=self.max_n_gts) else: encoded_polys = -np.ones((self.max_n_gts, self.max_poly_len), dtype=np.float32) else: if len(ids) > 0: gt_boxes = gt_boxes[ids] classes = classes[ids] agt_boxes = gt_boxes.copy() ids = filter_boxes(vgt_boxes, 10) if len(ids) > 0: vgt_boxes = vgt_boxes[ids] else: vgt_boxes = np.zeros((0, 4)) if len(vgt_boxes) > 0: ov = bbox_overlaps(np.ascontiguousarray(gt_boxes).astype(float), np.ascontiguousarray(vgt_boxes).astype(float)) mov = np.max(ov, axis=1) else: mov = np.zeros((len(gt_boxes))) invalid_gtids = np.where(mov < 1)[0] valid_gtids = np.where(mov == 1)[0] invalid_boxes = gt_boxes[invalid_gtids, :] gt_boxes = gt_boxes[valid_gtids, :] def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) if invalid_boxes is not None: if len(invalid_boxes) > 0: overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float)) argmax_overlapsn = overlapsn.argmax(axis=1) max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn] argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < self.neg_thresh] = 0 labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= self.pos_thresh] = 1 if invalid_boxes is not None: if len(invalid_boxes) > 0: labels[max_overlapsn > 0.3] = -1 else: labels[:] = 0 if len(invalid_boxes) > 0: overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float)) argmax_overlapsn = overlapsn.argmax(axis=1) max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn] if len(invalid_boxes) > 0: labels[max_overlapsn > 0.3] = -1 # subsample positive labels if we have too many fg_inds = np.where(labels == 1)[0] if len(fg_inds) > self.num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - self.num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = self.batch_size - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0]) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, self.feat_height, self.feat_width, self.num_anchors)).transpose(0, 3, 1, 2) labels = labels.reshape((1, self.num_anchors * self.feat_height * self.feat_width)).astype(np.float16) bbox_targets = bbox_targets.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose(2, 0, 1) bbox_weights = bbox_weights.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose((2, 0, 1)) pids = np.where(bbox_weights == 1) bbox_targets = bbox_targets[pids] fgt_boxes = -np.ones((100, 5)) if len(agt_boxes) > 0: fgt_boxes[:min(len(agt_boxes), 100), :] = np.hstack((agt_boxes, classes)) rval = [mx.nd.array(labels, dtype='float16'), bbox_targets, mx.nd.array(pids), mx.nd.array(fgt_boxes)] if has_mask: rval.append(mx.nd.array(encoded_polys)) return rval