def draw(path_img, save_path, preds, gts, nid): image = cv2.imread(path_img, 1) if preds is None and gts is None: pass elif preds is None: for e in gts: e = list(map(int, e)) cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (0, 0, 255)) cv2.putText(image, "fn", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) elif gts is None: for e in preds: cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (255, 0, 0)) cv2.putText(image, "fp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) else: for e in gts: e = list(map(int, e)) cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (255, 255, 255)) #cv2.putText(image, "gt", (int(max(2, e[2] - 20)), int(max(2, e[1] + 10))), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image) overlap = utils.bbox_overlaps(preds, gts) idx_assigned_gt = overlap.argmax(axis=1) confidence = overlap.max(axis=1) assigned_gts = gts[idx_assigned_gt] flag = np.where(confidence>=config.iou_thres, 1, 0) if np.sum(flag)>0: idx_tp = flag.nonzero()[0] tps = preds[idx_tp] for e in tps: cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (0, 255, 0)) cv2.putText(image, "tp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) flag = np.where(confidence<config.iou_thres, 1, 0) if np.sum(flag)>0: idx_fp = flag.nonzero()[0] fps = preds[idx_fp] for e in fps: cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (255, 0, 0)) cv2.putText(image, "fp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) #cv2.putText(image, "{}".format(avg_density[e[1]-1, e[0]-1]), (int(max(2, e[0] - 20)), int(max(2, e[1] - 20))), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) tmp = overlap.max(axis=0) flag2 = np.where(tmp<config.iou_thres, 1, 0) if np.sum(flag2)>0: idx_fn = flag2.nonzero()[0] fns = gts[idx_fn] for e in fns: e = list(map(int, e)) cv2.rectangle(image, tuple(e[:2]), tuple(e[2:]), (0, 0, 255)) cv2.putText(image, "fn", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) #cv2.putText(image, "{}".format(avg_density[e[1]-1, e[0]-1]), (int(max(2, e[0] - 20)), int(max(2, e[1] - 20))), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image)
def _sample_rois(self, all_rois, gt_boxes, gt_labels, gt_viewpoints, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) overlaps = overlaps.numpy() gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) #labels = gt_boxes[gt_assignment, 4] labels = gt_labels[gt_assignment] viewpoints = gt_viewpoints[gt_assignment] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= self.fg_threshold)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < self.bg_threshold[1]) & (max_overlaps >= self.bg_threshold[0]))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: #print(bg_inds, bg_rois_per_this_image) bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] viewpoints = viewpoints[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_this_image):] = 0 # TODO: should background bbox's viewpoint be 0? #viewpoints[int(fg_rois_per_this_image):] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, viewpoints
def add_labels_temporal(dets_df, hyp_df, gt_df, fr, ov_thresh=0.5): dc_df = gt_df[gt_df.type == 'DontCare'] gt_df = gt_df[gt_df.type != 'DontCare'] gt_df = gt_df.set_index(np.arange(len(gt_df))) # use gt to determine missed detections dets_bbox = dets_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values gt_bbox = gt_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values overlaps = bbox_overlaps(gt_bbox, dets_bbox) C = 1 - overlaps COST_MAX = 1e9 C[overlaps <= ov_thresh] += COST_MAX [row, col] = linear_sum_assignment(C) inds_valid = C[row, col] < COST_MAX # use gt to determine valid hypotheses hyp_bbox = hyp_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values hyp_overlaps = bbox_overlaps(hyp_bbox, gt_bbox) C_hyp = 1 - hyp_overlaps C_hyp[hyp_overlaps <= ov_thresh] += COST_MAX [row_h, col_h] = linear_sum_assignment(C_hyp) inds_valid_hyp = C_hyp[row_h, col_h] < COST_MAX labels = np.zeros(len(hyp_df)) # if associated to ground truth, verify that it was not detected for i, r in enumerate(row_h): if not inds_valid_hyp[i]: continue if col_h[i] not in row[inds_valid]: labels[r] = True if len(dc_df) and len(hyp_df): dc_bbox = dc_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values dc_overlaps = bbox_overlaps(dc_bbox, hyp_bbox, 'b') for index, hyp in hyp_df.iterrows(): if np.max(dc_overlaps[:, index]) >= 0.5 and labels[index] == 0: labels[index] = -1 hyp_df['valid'] = labels hyp_df = hyp_df[hyp_df.valid >= 0] return hyp_df
def produce_batch(image_file, true_boxes): image = Image.open(image_file).resize((image_size, image_size), Image.NEAREST) data = asarray(image) / 255.0 del image proposals, anchor_probs = generate_proposals(data) del data # Non maximal suppression keep = py_cpu_nms(np.hstack((proposals, anchor_probs)), NSM_THRESHOLD) if post_nms_N > 0: keep = keep[:post_nms_N] proposals = proposals[keep, :] anchor_probs = anchor_probs[keep] # RCNN proposals #proposals = np.vstack( (proposals, true_boxes) ) overlaps = bbox_overlaps(proposals, enlarged_bboxes) which_box = overlaps.argmax(axis=1) proposal_max_overlaps = overlaps.max(axis=1) # sub sample foreground and background fg_inds = np.where(proposal_max_overlaps >= FG_THRESHOLD_RCNN)[0] fg_rois_in_image = min(int(BATCH_SIZE / (1 + BG_FG_FRAC_RCNN)), fg_inds.size) if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_in_image, replace=False) bg_inds = np.where((proposal_max_overlaps < BG_THRESH_HI) & (proposal_max_overlaps >= BG_THRESH_LO))[0] bg_rois_in_image = min(fg_rois_in_image, bg_inds.size) if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_in_image, replace=False) keep_inds = np.append(fg_inds, bg_inds) np.random.shuffle(keep_inds) # Select sampled values from various arrays: rois = proposals[keep_inds] # The chosen rois # Scores of chosen rois (fg=1, bg=0) new_scores = np.zeros(len(proposals)) new_scores[fg_inds] = 1 roi_scores = new_scores[keep_inds].reshape(-1, 1) # targets targets = np.zeros((len(proposals), 4)).reshape(-1, 4) targets[fg_inds] = bbox_transform(proposals[fg_inds], true_boxes[which_box[fg_inds]]) targets = targets[keep_inds] return rois, targets, roi_scores
def assign_valid_anchors(self, bboxes, gt_bboxes, gt_labels): bboxes = bboxes[:, :4] overlaps = bbox_overlaps(gt_bboxes, bboxes) num_gts, num_bboxes = overlaps.size(0), overlaps.size(1) # 1. assign -1 by default assigned_gt_inds = overlaps.new_full( (num_bboxes, ), -1, dtype=torch.long) # for each anchor, which gt best overlaps with it # for each anchor, the max iou of all gts max_overlaps, argmax_overlaps = overlaps.max(dim=0) # for each gt, which anchor best overlaps with it # for each gt, the max iou of all proposals gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) # 2. assign negative: below if isinstance(self.neg_iou_thr, float): assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < self.neg_iou_thr)] = 0 elif isinstance(self.neg_iou_thr, tuple): assert len(self.neg_iou_thr) == 2 assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0]) & (max_overlaps < self.neg_iou_thr[1])] = 0 # 3. assign positive: above positive IoU threshold pos_inds = max_overlaps >= self.pos_iou_thr assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 # 4. assign fg: for each gt, proposals with highest IoU for i in range(num_gts): if gt_max_overlaps[i] >= self.min_pos_iou: max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] assigned_gt_inds[max_iou_inds] = i + 1 # deal with labels assigned_labels = assigned_gt_inds.new_zeros((num_bboxes, )) pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze() if pos_inds.numel() > 0: assigned_labels[pos_inds] = gt_labels[ assigned_gt_inds[pos_inds] - 1] return AssignResult( num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
def produce_batch(filepath, gt_boxes, w_h): # 首先加载feature_map feature_map=np.load(filepath)["fc"] # print("load feature map done.") # 获得feature map的长乘宽,即所有像素点数量 height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map=width*height # 用图片的长宽除以feature map的长宽,获得步长 img_width = w_h[0] img_height = w_h[1] w_stride = img_width / width h_stride = img_height / height # print("w_stride, h_stride", w_stride, h_stride) # 根据步长计算anchors #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) # base_anchors = generate_anchors(w_stride, h_stride, scales=np.asarray([1, 2, 4])) base_anchors = generate_anchors(16, 16, ratios=[0.5, 1], scales=np.asarray([1, 2, 8, 16])) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #这一步获得了分割点的所有横坐标及纵坐标 # 计算出了所有偏移的(x, y, x, y)值,为什么会重复两下,因为base_anchors输出的就是(0,0,w_stride-1,h_stride-1)的模式,需要同步偏移 shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # 事实证明,对shape为(1, 9, 4)的矩阵与shape为(num_feature_map, 1, 4)的矩阵相加结果是得到shape为(num_feature_map, 9, 4) all_anchors = (base_anchors.reshape((1, k, 4)) + shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map*k all_anchors = all_anchors.reshape((total_anchors, 4)) #only keep anchors inside image+borader. border=0 inds_inside = np.where( (all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < img_width+border ) & # width (all_anchors[:, 3] < img_height+border) # height )[0] anchors=all_anchors[inds_inside] if len(anchors) == 0: return None, None, None # calculate overlaps each anchors to each gt boxes, # a matrix with shape [len(anchors) x len(gt_boxes)] overlaps = bbox_overlaps(anchors, gt_boxes) # find the gt box with biggest overlap to each anchors, # and the overlap ratio. result (len(anchors),) argmax_overlaps = overlaps.argmax(axis=1) # overlaps中每一行的最大值的索引值,即每一个anchor与哪一个gt_box得分最高,返回的是一维张量 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 获得overlaps中每一列的最大值,即得分 # find the anchor with biggest overlap to each gt boxes, # and the overlap ratio. result (len(gt_boxes),) gt_argmax_overlaps = overlaps.argmax(axis=0) # overlaps中每一列的最大值的索引,即gt与哪个anchor最接近 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # 获得overlaps中每一列的最大值 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 获得与最大值相同的列值(纵坐标) #labels, 1=fg/0=bg/-1=ignore 指在图片范围内的anchors的标签 labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 根据论文,设置positive标签: # 只对两种anchor设置positive标签 # (1)与对每一个gt,IoU值最高的anchor # (2)对每一个anchor,其与所有gt的IoU最高分大于0.7的anchor labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= .7] = 1 # 设置负面标签 labels[max_overlaps <= .3] = 0 # subsample positive labels if we have too many # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice( # fg_inds, size=(len(fg_inds) - num_fg), replace=False) # labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = int(len(fg_inds) * BG_FG_FRAC) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: # 因为背景太多了,随机选出多余个的设置成忽略 disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) # 从np.arange(0, bg_inds)中随机选len(bg_inds) - num_bg个 labels[disable_inds] = -1 # 从这里开始,计算batch,batch_inds是所有不被忽略的points batch_inds=inds_inside[labels!=-1] # 是这样的,首先batch_inds获得了在特征图内部的的anchor的索引值,又因为anchor排列是按9个9个排下来的,因此除9就是为了得到这个anchor对应的坐标 batch_inds=(batch_inds / k).astype(np.int) # 获得对应于所有anchos的label full_labels = unmap(labels, total_anchors, inds_inside, fill=-1) # batch_label_targets为n个1*1*k的 batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds] bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :] # 获得标签为fg的anchors pos_anchors=all_anchors[inds_inside[labels==1]] # 归一化? bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1]) bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0) batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds] # 在feature_map的第二个和第三个轴前后各填充一个值 padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant') # 把padded_fcmap中维度为1的轴去掉,预期得到的是3维 padded_fcmap=np.squeeze(padded_fcmap) batch_tiles=[] for ind in batch_inds: x = ind % width y = int(ind/width) fc_3x3=padded_fcmap[y:y+3,x:x+3,:] batch_tiles.append(fc_3x3) # print("produce batch done.") return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()
def produce_batch(filepath, gt_boxes, scale): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = img_width / width h_stride = img_height / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) #only keep anchors inside image+borader. border = 0 inds_inside = np.where((all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < img_width + border) & # width (all_anchors[:, 3] < img_height + border) # height )[0] anchors = all_anchors[inds_inside] # calculate overlaps each anchors to each gt boxes, # a matrix with shape [len(anchors) x len(gt_boxes)] overlaps = bbox_overlaps(anchors, gt_boxes) # find the gt box with biggest overlap to each anchors, # and the overlap ratio. result (len(anchors),) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # find the anchor with biggest overlap to each gt boxes, # and the overlap ratio. result (len(gt_boxes),) gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #labels, 1=fg/0=bg/-1=ignore labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # set positive label, define in Paper3.1.2: # We assign a positive label to two kinds of anchors: (i) the # anchor/anchors with the highest Intersection-overUnion # (IoU) overlap with a ground-truth box, or (ii) an # anchor that has an IoU overlap higher than 0.7 with any gt boxes labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= .7] = 1 # set negative labels labels[max_overlaps <= .3] = 0 # subsample positive labels if we have too many # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice( # fg_inds, size=(len(fg_inds) - num_fg), replace=False) # labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = int(len(fg_inds) * BG_FG_FRAC) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # batch_inds = inds_inside[labels != -1] batch_inds = (batch_inds / k).astype(np.int) full_labels = unmap(labels, total_anchors, inds_inside, fill=-1) batch_label_targets = full_labels.reshape(-1, 1, 1, 1 * k)[batch_inds] bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :] pos_anchors = all_anchors[inds_inside[labels == 1]] bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels == 1]) bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels == 1], fill=0) batch_bbox_targets = bbox_targets.reshape(-1, 1, 1, 4 * k)[batch_inds] padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)), mode='constant') padded_fcmap = np.squeeze(padded_fcmap) batch_tiles = [] for ind in batch_inds: x = ind % width y = int(ind / width) fc_3x3 = padded_fcmap[y:y + 3, x:x + 3, :] batch_tiles.append(fc_3x3) return np.asarray(batch_tiles), batch_label_targets.tolist( ), batch_bbox_targets.tolist()
def extract_temporal_hypotheses(dets_df, tracks_df, width, height, ov_thresh=0.5): # ensure indices of dataframes are [0,n] dets_df = dets_df.set_index(np.arange(len(dets_df))) tracks_df = tracks_df.set_index(np.arange(len(tracks_df))) # use hungarian to determine inconsistencies dets_bbox = dets_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values tracks_bbox = tracks_df[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values # clip tracks to image size np.clip(tracks_bbox[:, 0], 0, width) np.clip(tracks_bbox[:, 1], 0, height) np.clip(tracks_bbox[:, 2], 0, width) np.clip(tracks_bbox[:, 3], 0, height) overlaps = bbox_overlaps(dets_bbox, tracks_bbox) C = 1 - overlaps COST_MAX = 1e9 C[overlaps <= ov_thresh] += COST_MAX [row, col] = linear_sum_assignment(C) inds_valid = C[row, col] < COST_MAX inds_inconsistencies = np.union1d( col[~inds_valid], np.setdiff1d(np.arange(tracks_bbox.shape[0]), col[inds_valid])) hypotheses = tracks_df.iloc[inds_inconsistencies, :] # compute features for each hypothesis tracks_overlaps = bbox_overlaps(tracks_bbox, tracks_bbox) np.fill_diagonal(tracks_overlaps, -1) hypotheses_df = pd.DataFrame(columns=[ 'x', 'y', 'w', 'h', 'r', 'det_cnt', 'mean_det_ov', 'med_det_ov', 'min_det_ov', 'max_det_ov', 'mean_det_cnf', 'med_det_cnf', 'min_det_cnf', 'max_det_cnf', 'hyp_cnt', 'mean_hyp_ov', 'med_hyp_ov', 'min_hyp_ov', 'max_hyp_ov', 'mean_hyp_cnf', 'med_hyp_cnf', 'min_hyp_cnf', 'max_hyp_cnf', 'type', 'bb_left', 'bb_top', 'bb_right', 'bb_bottom', 'width', 'height', 'id' ]) for index, h in hypotheses.iterrows(): row = pix_to_xy( h[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values, width, height) row += [h['conf']] # detections overlapping hypothesis det_ov = overlaps[:, index] inds_ov = det_ov > 0 det_cnt = np.sum(inds_ov) if np.any(inds_ov): mean_det_ov = np.mean(det_ov[inds_ov]) med_det_ov = np.median(det_ov[inds_ov]) min_det_ov = np.min(det_ov[inds_ov]) max_det_ov = np.max(det_ov[inds_ov]) mean_det_cnf = np.mean(dets_df.loc[inds_ov, ['conf']].values) med_det_cnf = np.median(dets_df.loc[inds_ov, ['conf']].values) min_det_cnf = np.min(dets_df.loc[inds_ov, ['conf']].values) max_det_cnf = np.max(dets_df.loc[inds_ov, ['conf']].values) else: mean_det_ov = 0 med_det_ov = 0 min_det_ov = 0 max_det_ov = 0 mean_det_cnf = 0 med_det_cnf = 0 min_det_cnf = 0 max_det_cnf = 0 row += [ det_cnt, mean_det_ov, med_det_ov, min_det_ov, max_det_ov, mean_det_cnf, med_det_cnf, min_det_cnf, max_det_cnf ] # other shifted detections overlapping current hypothesis hyp_ov = tracks_overlaps[index, :] inds_hyp_ov = hyp_ov > 0 hyp_cnt = np.sum(inds_hyp_ov) if np.any(inds_hyp_ov): mean_hyp_ov = np.mean(hyp_ov[inds_hyp_ov]) med_hyp_ov = np.median(hyp_ov[inds_hyp_ov]) min_hyp_ov = np.min(hyp_ov[inds_hyp_ov]) max_hyp_ov = np.max(hyp_ov[inds_hyp_ov]) mean_hyp_cnf = np.mean(tracks_df.loc[inds_hyp_ov, ['conf']].values) med_hyp_cnf = np.median(tracks_df.loc[inds_hyp_ov, ['conf']].values) min_hyp_cnf = np.min(tracks_df.loc[inds_hyp_ov, ['conf']].values) max_hyp_cnf = np.max(tracks_df.loc[inds_hyp_ov, ['conf']].values) else: mean_hyp_ov = 0 med_hyp_ov = 0 min_hyp_ov = 0 max_hyp_ov = 0 mean_hyp_cnf = 0 med_hyp_cnf = 0 min_hyp_cnf = 0 max_hyp_cnf = 0 row += [ hyp_cnt, mean_hyp_ov, med_hyp_ov, min_hyp_ov, max_hyp_ov, mean_hyp_cnf, med_hyp_cnf, min_hyp_cnf, max_hyp_cnf ] # add meta information row += [h['type']] row += h[['bb_left', 'bb_top', 'bb_right', 'bb_bottom']].values.tolist() row += [width, height] row += [h['id']] # append to dataframe hypotheses_df.loc[len(hypotheses_df)] = row return hypotheses_df
def forward(self, pred, target, weight=None, eps=1e-6): ious = bbox_overlaps(pred, target).clamp(min=eps) loss = -ious.log() if weight is not None: loss *= weight return loss
def __anchor_target_layer(self,rpn_cls_score,gt_boxes,im_info,feat_stride,anchor,A): allowed_border = 0 total_anchors = anchor.shape[0] height, width = rpn_cls_score.shape[1:3] inds_inside = np.where( (anchor[:,0] >= allowed_border) & (anchor[:,1] >= allowed_border) & (anchor[:,2] < im_info[1] + allowed_border) & (anchor[:,3] < im_info[0] + allowed_border) )[0] anchors = anchor[inds_inside,:] labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) #print("anchor detail..") #print(anchors) #print("gt_boxes detail..") #print(gt_boxes) overlaps = bbox_overlaps(anchors,gt_boxes) '''anchor class output..''' argmax_overlap = overlaps.argmax(axis= 1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap] '''gt class output''' gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #print("argmax_overlap") #print(np.where(argmax_overlap > 0)) #print("gt_argmax_overlaps") #print(gt_argmax_overlaps.shape) #print(gt_argmax_overlaps) #print(anchors[gt_argmax_overlaps]) labels[max_overlaps < self._threshold_for_label_zero] = 0 labels[gt_argmax_overlaps] = 1 #print(gt_argmax_overlaps.shape) labels[max_overlaps > self._threshold_for_label_one] = 1 #print("label_one") #print(np.where(max_overlaps > self._threshold_for_label_one)[0].shape) fg_index = np.where(labels == 1)[0] bg_index = np.where(labels == 0)[0] fg_index_len =len(fg_index) bg_index_len =len(bg_index) '''always same ratio''' if 3* fg_index_len > bg_index_len: disable_inds = np.random.choice(fg_index,size = (3* fg_index_len - bg_index_len) ,replace = False) else: disable_inds = np.random.choice(bg_index,size = (bg_index_len - 3* fg_index_len) , replace = False) labels[disable_inds] = -1 #print(np.where(labels == 0)[0].shape) #print(np.where(labels == 1)[0].shape) #print("gt_boxes[argmax_overlap,:]") #print(gt_boxes[argmax_overlap, :]) #print(gt_boxes[argmax_overlap,:].shape) #print(gt_boxes[argmax_overlap,:][np.where(argmax_overlap > 0)]) #print(argmax_overlap.shape) bbox_targets = bbox_transform(anchors,gt_boxes[argmax_overlap,:]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array([1.0,1.0,1.0,1.0]) bbox_outside_weights = np.zeros((len(inds_inside),4), dtype=np.float32) num_example = np.sum(labels >= 0) positive_weight = np.ones((1,4)) * 1.0 / num_example negative_weight = np.ones((1,4)) * 1.0 / num_example bbox_outside_weights[labels == 1, :] = positive_weight bbox_outside_weights[labels == 0, :] = negative_weight labels = _unmap(labels, total_anchors, inds_inside,fill = -1) bbox_targets = _unmap(bbox_targets,total_anchors,inds_inside,fill = 0) bbox_inside_weights = _unmap(bbox_inside_weights,total_anchors,inds_inside,fill = 0) bbox_outside_weights= _unmap(bbox_outside_weights,total_anchors,inds_inside,fill = 0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) #print("label") #print(np.where(labels == 1)[0].shape) #print(np.where(labels == 0)[0].shape) # bbox_targets bbox_targets = bbox_targets.reshape((1, height, width, A * 4)) # bbox_inside_weights bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)) # bbox_outside_weights bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)) return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def label_generate(img, gta): #inti base matrix (output_width, output_height) = (60, 40) num_anchors = 9 #40,60,9 #40,60,9,4 y_rpn_overlap = np.zeros((output_height, output_width, num_anchors)) y_is_box_valid = np.zeros((output_height, output_width, num_anchors)) y_rpn_regr = np.zeros((output_height * output_width * num_anchors, 4)) #anchor box generate(generate anchors in each shifts box) anchor_box = _generate_all_bbox(output_width, output_height) total_anchors = anchor_box.shape[0] #print('the shape of anchor_box', np.asarray(anchor_box).shape) #print('the total number os anchors',total_anchors) #Only inside anchors are valid _allowed_border = 0 im_info = img.shape[:2] inds_inside = np.where( (anchor_box[:, 0] >= -_allowed_border) & (anchor_box[:, 1] >= -_allowed_border) & (anchor_box[:, 2] < im_info[1] + _allowed_border) & # width (anchor_box[:, 3] < im_info[0] + _allowed_border) # height )[0] #print('inside anchor index',inds_inside) #print('number of valid anchors',len(inds_inside)) valid_anchors = anchor_box[inds_inside, :] #print('valid_anchors display',valid_anchors) #print('shape of valid_anchors',np.asarray(valid_anchors).shape) y_rpn_regr[inds_inside] = anchor_box[inds_inside, :] #print('rpn overlap display', y_rpn_regr) #print('shape of rpn overlap',np.asarray(y_rpn_regr).shape) #print('rpn overlap[inds_inside] display', y_rpn_regr[inds_inside]) #print('shape of inds_inside rpn overlaps',np.asarray(y_rpn_regr[inds_inside]).shape) #calculate iou(overlaps) #print('y_rpn_overlap') overlaps = utils.bbox_overlaps( np.ascontiguousarray(y_rpn_regr, dtype=np.float), np.ascontiguousarray(gta, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = np.zeros((output_height * output_width * num_anchors)) max_overlaps[inds_inside] = overlaps[np.arange(len(inds_inside)), argmax_overlaps[inds_inside]] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] #print('overlaps display',overlaps) #print('shape of overlaps', np.asarray(overlaps).shape) #print('argmax_overlaps', argmax_overlaps) #print('shape of argmax_overlaps',argmax_overlaps.shape) #print('max overlaps display', max_overlaps) #print('total number of max overlaps', len(max_overlaps)) #print('shape of max overlaps', max_overlaps.shape) #print('gt_max_overlaps display', gt_max_overlaps) #print('total number of gt_max_overlaps', len(gt_max_overlaps)) #print('gt_argmax_overlaps', gt_argmax_overlaps) #print('number of gt_argmax_overlaps', len(gt_argmax_overlaps)) #y_rpn_overlap, y_is_box_valid y_rpn_overlap = y_rpn_overlap.reshape(output_height * output_width * num_anchors) y_is_box_valid = y_is_box_valid.reshape(output_height * output_width * num_anchors) #negative #print('shape of y_rpn_overlap', y_rpn_overlap.shape) #print('shape of y_is_box_valid',y_is_box_valid.shape) y_rpn_overlap[max_overlaps < neg_min_overlaps] = 0 y_is_box_valid[inds_inside] = 1 #y_is_box_valid[max_overlaps < neg_min_overlaps] = 1#not good way to set all box as valid, because we also have outside box here #neutral #np.logical_and y_rpn_overlap[np.logical_and(neg_min_overlaps < max_overlaps, max_overlaps < pos_max_overlaps)] = 0 y_is_box_valid[np.logical_and(neg_min_overlaps < max_overlaps, max_overlaps < pos_max_overlaps)] = 0 #y_rpn_overlap[neg_min_overlaps < max_overlaps and max_overlaps < pos_max_overlaps] = 0 #y_is_box_valid[neg_min_overlaps < max_overlaps and max_overlaps < pos_max_overlaps] = 0 #positive y_rpn_overlap[gt_argmax_overlaps] = 1 y_is_box_valid[gt_argmax_overlaps] = 1 y_rpn_overlap[max_overlaps >= pos_max_overlaps] = 1 y_is_box_valid[max_overlaps >= pos_max_overlaps] = 1 # subsample positive labels if we have too many num_fg = int(fraction * batchsize) #print('balanced fg',num_fg) disable_inds = [] fg_inds = np.where(np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 1))[0] #print('fg number',len(fg_inds)) if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) #labels[disable_inds] = -1 y_is_box_valid[disable_inds] = 0 y_rpn_overlap[disable_inds] = 0 fg_inds = np.where(np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 1))[0] # subsample negative labels if we have too many num_bg = batchsize - np.sum( np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 1)) bg_inds = np.where(np.logical_and(y_rpn_overlap == 0, y_is_box_valid == 1))[0] #print('bg number',len(bg_inds)) if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) #labels[disable_inds] = -1 y_is_box_valid[disable_inds] = 0 y_rpn_overlap[disable_inds] = 0 #print ("was %s inds, disabling %s, now %s %sinds" % (len(bg_inds), len(disable_inds), np.sum(np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 0)))) #print('negative samples',np.where(np.logical_and(y_rpn_overlap == 0, y_is_box_valid == 1))[0]) #print('postive samples',np.where(np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 1))[0]) #print('number of postive samples',len(np.where(np.logical_and(y_rpn_overlap == 1, y_is_box_valid == 1))[0])) #print('number of negative samples',len(np.where(np.logical_and(y_rpn_overlap == 0, y_is_box_valid == 1))[0])) #bbox transfer for all valid postive samples y_rpn_regr[fg_inds] = utils._compute_targets( y_rpn_regr[fg_inds], gta[argmax_overlaps[fg_inds], :]) #print('bbox targets shape', y_rpn_regr.shape) #print('bbox targets value', y_rpn_regr) #print('bbox targets[inds_inside]', y_rpn_regr[inds_inside]) y_rpn_overlap = y_rpn_overlap.reshape(output_height, output_width, num_anchors) y_is_box_valid = y_is_box_valid.reshape(output_height, output_width, num_anchors) #print('y rpn overlaps',y_rpn_overlap) #print('y is valid',y_is_box_valid) #print('') y_rpn_regr = y_rpn_regr.reshape(output_height, output_width, num_anchors * 4) y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0) y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0) y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0) y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=3) #print('shape of rpn cls',y_rpn_cls.shape) overlaps = np.repeat(y_rpn_overlap, 4, axis=3) #print('repeat', overlaps.shape) y_rpn_regr = np.concatenate( [np.repeat(y_rpn_overlap, 4, axis=3), y_rpn_regr], axis=3) #print('shape is ',y_rpn_cls.shape, y_rpn_regr.shape, y_is_box_valid.shape) return np.copy(y_rpn_cls), np.copy(y_rpn_regr)
def produce_batch(feature_map, gt_boxes, h_w=None, category=None): height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height w_stride = h_w[1] / width h_stride = h_w[0] / height #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * anchors_num all_anchors = all_anchors.reshape((total_anchors, 4)) # 用训练好的rpn进行预测,得出scores和deltas res = rpn_model.query_cnn(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉 proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes keep = filter_boxes(proposals, small_box_threshold) # here threshold is 40 pixel proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # 把ground true也加到proposals中 proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories
def produce_batch(image_file, true_boxes): image_name = image_file.replace('.jpg','').replace(trainDIR ,'') image = Image.open(image_file).resize((image_size ,image_size ), Image.NEAREST) data = asarray(image)/255.0 del image feature_map = pretrained_model.predict(data.reshape(-1,data.shape[0],data.shape[1],data.shape[2])) del data feature_size = feature_map.shape[1] feature_stride = int( image_size / feature_size ) number_feature_points = feature_size * feature_size shift = np.arange(0, feature_size) * feature_stride shift_x, shift_y = np.meshgrid(shift, shift) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() base_anchors = generate_anchors(feature_stride, feature_stride,ratios = ANCHOR_RATIOS, scales = ANCHOR_SCALES) all_anchors = (base_anchors.reshape((1, anchor_number, 4)) + shifts.reshape((1, number_feature_points, 4)).transpose((1, 0, 2))) total_anchor_number = anchor_number*number_feature_points all_anchors = all_anchors.reshape((total_anchor_number , 4)) #only keep anchors inside image+border. border=0 # could also be FILTER_SIZE x feature stride inds_inside = np.where( (all_anchors[:, 0] >= -border) & (all_anchors[:, 1] >= -border) & (all_anchors[:, 2] < image_size+border ) & (all_anchors[:, 3] < image_size+border) )[0] anchors=all_anchors[inds_inside] useful_anchor_number = len(inds_inside) overlaps = bbox_overlaps(anchors, true_boxes) which_box = overlaps.argmax(axis=1) # Which true box has more overlap with each anchor? anchor_max_overlaps = overlaps[np.arange(overlaps.shape[0]), which_box] which_anchor = overlaps.argmax(axis=0) # Which anchor has more overlap for each true box? box_max_overlaps = overlaps[which_anchor, np.arange(overlaps.shape[1])] which_anchor_v2 = np.where(overlaps == box_max_overlaps)[0] labels = np.empty((useful_anchor_number, ), dtype=np.float32) labels.fill(-1) labels[ which_anchor_v2 ] = 1 labels[ anchor_max_overlaps >= FG_THRESHOLD] = 1 labels[ anchor_max_overlaps <= BG_THRESHOLD] = 0 fg_inds = np.where(labels == 1)[0] bg_inds = np.where(labels == 0)[0] num_fg = int(BATCH_SIZE/(1+BG_FG_FRAC)) if len(fg_inds) > num_fg: disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] num_bg = int(len(fg_inds) * BG_FG_FRAC) if len(bg_inds) > num_bg: disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bg_inds = np.where(labels == 0)[0] anchor_batch_inds = inds_inside[labels!=-1] np.random.shuffle(anchor_batch_inds) feature_batch_inds=(anchor_batch_inds / anchor_number).astype(np.int) pad_size = int((FILTER_SIZE-1)/2) padded_fcmap=np.pad(feature_map,((0,0),(pad_size,pad_size),(pad_size,pad_size),(0,0)),mode='constant') padded_fcmap=np.squeeze(padded_fcmap) batch_tiles=[] for ind in feature_batch_inds: # x,y are the point in the feature map pointed at by feature_batch_inds indices x = ind % feature_size y = int(ind/feature_size) fc_snip=padded_fcmap[y:y+FILTER_SIZE,x:x+FILTER_SIZE,:] batch_tiles.append(fc_snip) # unmap creates another array of labels that includes a -1 for the originally deleted anchors for being out of bounds. full_labels = unmap(labels, total_anchor_number , inds_inside, fill=-1) batch_labels =full_labels.reshape(-1,1,1,1*anchor_number)[feature_batch_inds] targets = np.zeros((len(inds_inside), 4), dtype=np.float32) pos_anchors=all_anchors[inds_inside[labels==1]] # positive anchors targets = bbox_transform(pos_anchors, true_boxes[which_box, :][labels==1]) targets = unmap(targets, total_anchor_number, inds_inside[labels==1], fill=0) batch_targets = targets.reshape(-1,1,1,4*anchor_number)[feature_batch_inds] return np.asarray(batch_tiles), batch_labels.tolist(), batch_targets.tolist()
def rpn_targets(self, all_anchors, im, gt): total_anchors = all_anchors.shape[0] gt_boxes = gt['boxes'] height, width = im.size()[-2:] # only keep anchors inside the image _allowed_border = 0 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < width + _allowed_border) & # width (all_anchors[:, 3] < height + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] #print(anchors.shape) # assert anchors.shape[0] > 0, '{0}x{1} -> {2}'.format(height,width,total_anchors) if anchors.shape[0] == 0: print('{0}x{1} -> {2}'.format(height, width, total_anchors)) return None, None # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) #overlaps = bbox_overlaps(anchors, gt_boxes)#.numpy() overlaps = bbox_overlaps(torch.from_numpy(anchors), gt_boxes).numpy() gt_boxes = gt_boxes.numpy() argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # assign bg labels first so that positive labels can clobber them labels[max_overlaps < self.negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= self.positive_overlap] = 1 # subsample positive labels if we have too many num_fg = int(self.fg_fraction * self.batch_size) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = self.batch_size - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) return labels, bbox_targets
def __anchor_target_layer(self, rpn_cls_score, gt_boxes, im_info, feat_stride, anchor, A): allowed_border = 0 total_anchors = anchor.shape[0] height, width = rpn_cls_score.shape[1:3] inds_inside = np.where((anchor[:, 0] >= allowed_border) & (anchor[:, 1] >= allowed_border) & (anchor[:, 2] < im_info[1] + allowed_border) & (anchor[:, 3] < im_info[0] + allowed_border))[0] anchors = anchor[inds_inside, :] labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) overlaps = bbox_overlaps(anchors, gt_boxes) argmax_overlap = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlap] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < 0.3] = 0 labels[gt_argmax_overlaps] = 1 bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlap, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) num_example = np.sum(labels >= 0) positive_weight = np.ones((1, 4)) * 1.0 / num_example negative_weight = np.ones((1, 4)) * 1.0 / num_example bbox_outside_weights[labels == 1, :] = positive_weight bbox_outside_weights[labels == 0, :] = negative_weight labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) # bbox_targets bbox_targets = bbox_targets.reshape((1, height, width, A * 4)) # bbox_inside_weights bbox_inside_weights = bbox_inside_weights.reshape( (1, height, width, A * 4)) # bbox_outside_weights bbox_outside_weights = bbox_outside_weights.reshape( (1, height, width, A * 4)) return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def produce_batch(filepath, gt_boxes, h_w, category): img = load_img(filepath) img_width = np.shape(img)[1] * scale[1] img_height = np.shape(img)[0] * scale[0] img = img.resize((int(img_width), int(img_height))) #feed image to pretrained model and get feature map img = img_to_array(img) img = np.expand_dims(img, axis=0) feature_map = pretrained_model.predict(img) height = np.shape(feature_map)[1] width = np.shape(feature_map)[2] num_feature_map = width * height #calculate output w, h stride w_stride = h_w[1] / width h_stride = h_w[0] / height #generate base anchors according output stride. #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1) base_anchors = generate_anchors(w_stride, h_stride) #slice tiles according to image size and stride. #each 1x1x1532 feature map is mapping to a tile. shift_x = np.arange(0, width) * w_stride shift_y = np.arange(0, height) * h_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #apply base anchors to all tiles, to have a num_feature_map*9 anchors. all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape( (1, num_feature_map, 4)).transpose((1, 0, 2))) total_anchors = num_feature_map * 9 all_anchors = all_anchors.reshape((total_anchors, 4)) # feed feature map to pretrained RPN model, get proposal labels and bboxes. res = rpn_model.predict(feature_map) scores = res[0] scores = scores.reshape(-1, 1) deltas = res[1] deltas = np.reshape(deltas, (-1, 4)) # proposals transform to bbox values (x1, y1, x2, y2) proposals = bbox_transform_inv(all_anchors, deltas) proposals = clip_boxes(proposals, (h_w[0], h_w[1])) # remove small boxes, here threshold is 40 pixel keep = filter_boxes(proposals, 40) proposals = proposals[keep, :] scores = scores[keep] # sort socres and only keep top 6000. pre_nms_topN = 6000 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # apply NMS to to 6000, and then keep top 300 post_nms_topN = 300 keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # add gt_boxes to proposals. proposals = np.vstack((proposals, gt_boxes)) # calculate overlaps of proposal and gt_boxes overlaps = bbox_overlaps(proposals, gt_boxes) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) # labels = gt_labels[gt_assignment] #? # sub sample fg_inds = np.where(max_overlaps >= FG_THRESH)[0] fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] bg_rois_per_this_image = BATCH - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: # labels = labels[keep_inds] rois = proposals[keep_inds] gt_rois = gt_boxes[gt_assignment[keep_inds]] targets = bbox_transform(rois, gt_rois) #input rois rois_num = targets.shape[0] batch_box = np.zeros((rois_num, 200, 4)) for i in range(rois_num): batch_box[i, category] = targets[i] batch_box = np.reshape(batch_box, (rois_num, -1)) # get gt category batch_categories = np.zeros((rois_num, 200, 1)) for i in range(rois_num): batch_categories[i, category] = 1 batch_categories = np.reshape(batch_categories, (rois_num, -1)) return rois, batch_box, batch_categories