def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] for cls_ind in range(num_classes): for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] inds = np.where((x2 > x1) & (y2 > y1))[0] dets = dets[inds, :] if dets == []: continue keep = nms(dets, thresh) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def tester(fusion, NUM, bbox_vote=False, max_per_image=400): all_boxes = [[[] for _ in range(99999)] for _ in range(501)] for i in tqdm(range(99999), file=sys.stdout, leave=False, dynamic_ncols=True): for j in range(1, 501): det_boxes = np.vstack((fusion[q][j][i] for q in range(NUM))) keep = nms(det_boxes, 0.4) det_boxes_after_nms = det_boxes[keep, :] if bbox_vote: cls_dets_after_vote = bbox_voting(det_boxes_after_nms, det_boxes, threshold=0.5) all_boxes[j][i] = cls_dets_after_vote else: all_boxes[j][i] = det_boxes_after_nms if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, 501)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, 501): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] '''step-3: save and eval''' with open('output/model_all-test-mst-nms0.4-bbox-vote0.5.pkl', 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)
def forward(self, inputs): if self.training: img_batch, annotations = inputs else: img_batch = inputs x = self.conv1(img_batch) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x1 = self.layer1(x) x2 = self.layer2(x1) x3 = self.layer3(x2) x4 = self.layer4(x3) features = self.fpn([x2, x3, x4]) regression = torch.cat( [self.regressionModel(feature) for feature in features], dim=1) classification = torch.cat( [self.classificationModel(feature) for feature in features], dim=1) anchors = self.anchors(img_batch) if self.training: return self.focalLoss(classification, regression, anchors, annotations) else: transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms( torch.cat([transformed_anchors, scores], dim=2)[0, :, :].cpu().numpy(), 0.5) nms_scores, nms_class = classification[0, anchors_nms_idx, :].max( dim=1) return [ nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :] ]
def proposal(cls_pre, box_pre_y, box_pre_offset, img_size): ### calculate the proposals from the output of the CTPN model, and filter them with NMS ### input:cls_pre,box_pre_y,box_pre_offset:the output of the CTPN model;img_size:the original size of image ### output:proposals h_feat, w_feat = cls_pre.shape[0:2] K = h_feat * w_feat base_anchors = gtf.gen_base_anchors() A = base_anchors.shape[0] base_anchors = base_anchors.reshape(1, A, 4) shift_x = np.arange(w_feat) * gtf._stripe # 对于feature map上每个点,x方向anchor偏移量 shift_y = np.arange(h_feat) * gtf._stripe # 对于feature map上每个点,y方向anchor偏移量 shift_x, shift_y = np.meshgrid(shift_x, shift_y) # 生成二维点阵的x,y方向偏移量 shift_x = shift_x.ravel() # 二维变一维 shift_y = shift_y.ravel() shift = np.stack([shift_x, shift_y, shift_x, shift_y]).transpose() shift = shift.reshape(K, 1, 4) all_anchors = base_anchors + shift all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = K * A box_pre_y = np.reshape(box_pre_y, [-1, 2]) box_pre_offset = np.reshape(box_pre_offset, -1) boxes, x_left_fixed, x_right_fixed = target_calc_inv_no_side_labels( all_anchors, box_pre_y, box_pre_offset) index_inside = np.where((boxes[:, 0] >= 0) & (boxes[:, 1] >= 0) & (boxes[:, 2] < img_size[1]) & (boxes[:, 3] < img_size[0]))[0] proposals = boxes[index_inside, :] x_left_fixed = x_left_fixed[index_inside] x_right_fixed = x_right_fixed[index_inside] ###get the confidence scores for each anchor cls_pre = np.reshape(cls_pre, [-1, 2]) cls_softmax = tf.nn.softmax(cls_pre, axis=1) scores = cls_softmax.numpy()[:, 1] scores = scores[index_inside] ### keep the proposals with score>0.7 index_keep = np.where(scores > 0.7)[0] proposals = proposals[index_keep] x_left_fixed = x_left_fixed[index_keep] x_right_fixed = x_right_fixed[index_keep] scores = scores[index_keep] ###NMS filter order = scores.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] x_left_fixed = x_left_fixed[order] x_right_fixed = x_right_fixed[order] scores = np.expand_dims(scores, axis=1) nms_input = np.hstack((proposals, scores)).astype(np.float32) nms_thresh = 0.2 keep = nms.nms(nms_input, nms_thresh) proposals = proposals[keep, :] x_left_fixed = x_left_fixed[keep] x_right_fixed = x_right_fixed[keep] scores = scores[keep] return scores, proposals, x_left_fixed, x_right_fixed
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time)) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def _nms_filter(all_seg_cls_frm_dets, max_per_frame=20): all_seg_frm_cls_dets = [] for seg_idx in range(len(all_seg_cls_frm_dets)): seg_dets = all_seg_cls_frm_dets[seg_idx] all_seg_frm_cls_dets.append([]) cls_num = len(seg_dets) seg_len = len(seg_dets[0]) # init new container for seg_frm_idx in range(seg_len): all_seg_frm_cls_dets[seg_idx].append([]) for j in range(cls_num): all_seg_frm_cls_dets[seg_idx][seg_frm_idx].append([]) # perform NMS for cls_idx, cls_dets in enumerate(seg_dets): for seg_frm_idx, frm_dets in enumerate(cls_dets): keep = nms(frm_dets, 0.3) all_seg_frm_cls_dets[seg_idx][seg_frm_idx][cls_idx] = [ frm_dets[i] for i in keep if frm_dets[i][4] >= 0.1 ] # reserve top N per frame for seg_frm_idx in range(seg_len): frm_dets = [] for cls_idx in range(cls_num): seg_frm_cls_dets = all_seg_frm_cls_dets[seg_idx][seg_frm_idx][ cls_idx] for det in seg_frm_cls_dets: frm_dets.append({ 'det': det, 'scr': det[-1], 'cls': cls_idx }) sorted_frm_dets = sorted(frm_dets, key=lambda item: item['scr'], reverse=True)[:max_per_frame] frm_cls_dets = [[] for _ in range(cls_num)] for frm_det in sorted_frm_dets: frm_cls_dets[frm_det['cls']].append(frm_det['det']) for cls_idx in range(cls_num): all_seg_frm_cls_dets[seg_idx][seg_frm_idx][ cls_idx] = frm_cls_dets[cls_idx] return all_seg_frm_cls_dets
def demo(sess,net,image_dir,image_name): im_file = os.path.join(image_dir,image_name) im = cv2.imread(im_file) timer = Timer() timer.tic() scores,boxes = im_detect(sess,net,im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time,boxes.shape[0])) CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind,cls in enumerate(CLASSES[1:]): cls_ind += 1 cls_boxes = boxes[:,4*cls_ind:4*(cls_ind+1)] cls_scores = scores[:,cls_ind] dets = np.hstack((cls_boxes,cls_scores[:,np.newaxis])).astype(np.float32) keep = nms(dets,NMS_THRESH) dets = dets[keep,:] vis_detections(im,cls,dets,thresh=CONF_THRESH)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # 获取得分和边界框 scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) # 返回anchors关于预测Bbox的位置,采用yolo v2中预测相对于grid cell,生成proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 将建议区域裁剪到图像边界内 proposals = clip_boxes(proposals, im_info[:2]) # 选择前面的建议区域,降成一维数组改成降序 order = scores.ravel().argsort()[::-1] # 选择前12000个 if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 进行非极大值抑制 keep = nms(np.hstack((proposals, scores)), nms_thresh) # 再挑选前N项,现在是2000项 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def tracking_by_match(vid_dets, thr=0.6, max_traj_num=50): # init: nms, add tids for cls_idx, cls_dets in enumerate(vid_dets): for frm_idx, frm_dets in enumerate(cls_dets): if frm_dets is None or len(frm_dets) == 0: continue keep = nms(frm_dets, 0.3) # pdb.set_trace() frm_dets = frm_dets[keep] frm_dets_new = np.zeros((frm_dets.shape[0], frm_dets.shape[1] + 1)) frm_dets_new[:, :frm_dets.shape[1]] = frm_dets frm_dets_new[:, -1] = -1 cls_dets[frm_idx] = frm_dets_new curr_tid = 0 tid2scr = {} tid2cnt = {} for cls_idx, cls_dets in enumerate(vid_dets): for frm_idx in range(len(cls_dets) - 1): curr_frm_dets = cls_dets[frm_idx] next_frm_dets = cls_dets[frm_idx + 1] for det in curr_frm_dets: if det[-1] == -1: det[-1] = curr_tid tid2scr[curr_tid] = det[4] tid2cnt[curr_tid] = 1 curr_tid += 1 if next_frm_dets is None or len(next_frm_dets) == 0: continue curr_x1, curr_y1, curr_x2, curr_y2, scr, tid = det next_x1s = next_frm_dets[:, 0] next_y1s = next_frm_dets[:, 1] next_x2s = next_frm_dets[:, 2] next_y2s = next_frm_dets[:, 3] i_x1s = np.maximum(curr_x1, next_x1s) i_y1s = np.maximum(curr_y1, next_y1s) i_x2s = np.minimum(curr_x2, next_x2s) i_y2s = np.minimum(curr_y2, next_y2s) # u_x1s = np.minimum(curr_x1, next_x1s) # u_y1s = np.minimum(curr_y1, next_y1s) # u_x2s = np.maximum(curr_x2, next_x2s) # u_y2s = np.maximum(curr_y2, next_y2s) i_areas = np.maximum((i_x2s - i_x1s + 1), 0) * np.maximum( (i_y2s - i_y1s + 1), 0) u_areas = (curr_x2 - curr_x1 + 1) * (curr_y2 - curr_y1 + 1) + \ (next_x2s - next_x1s + 1) * (next_y2s - next_y1s + 1) - i_areas ious = i_areas / u_areas # print(ious) best_det_id = np.argmax(ious) if ious[best_det_id] > thr: next_frm_dets[best_det_id, -1] = tid tid2scr[tid] += next_frm_dets[best_det_id, 4] tid2cnt[tid] += 1 if frm_idx == len(cls_dets) - 2: for det in next_frm_dets: if det[-1] == -1: det[-1] = curr_tid tid2scr[curr_tid] = det[-2] tid2cnt[curr_tid] = 1 curr_tid += 1 tid2conf = {} for tid in tid2cnt: traj_conf = tid2scr[tid] / tid2cnt[tid] if traj_conf >= 0.01: # tid2conf[tid] = (tid2scr[tid] / tid2cnt[tid] + tid2iou[tid] / tid2cnt[tid]) / 2 tid2conf[tid] = (tid2scr[tid] * 1.0 / tid2cnt[tid] + tid2cnt[tid] * 10.0 / len(vid_dets[0])) reserved_tid_conf_list = sorted(tid2conf.items(), key=lambda item: item[1], reverse=True)[:max_traj_num] reserved_tids = {tid: conf for tid, conf in reserved_tid_conf_list} all_boxes = [] for cls_idx in range(len(vid_dets)): cls_boxes = [] for frm_idx in range(len(vid_dets[0])): cls_boxes.append({}) all_boxes.append(cls_boxes) for cls_idx, cls_dets in enumerate(vid_dets): for frm_idx, frm_dets in enumerate(cls_dets): for det in frm_dets: tid = det[-1] if tid not in reserved_tids: continue else: det[4] = tid2conf[tid] all_boxes[cls_idx][frm_idx][tid] = det.tolist() for cls_id in range(len(all_boxes)): cls_boxes = all_boxes[cls_id] for frm_id in range(len(cls_boxes)): cls_boxes[frm_id] = np.array(cls_boxes[frm_id].values()) return all_boxes
def process(self, img_name): txt_path = self.result_dir + img_name[:-4] + '.txt' if os.path.exists(txt_path): with open(txt_path, 'r') as f_txt: txt_items = f_txt.readlines() return len(txt_items), img_name img_path = os.path.join(self.img_dir, img_name) im = Image.open(img_path).convert('RGB') if cfg.predict_cut_text_line: im_array = np.array(im, dtype=np.float32) d_width, d_height = resize_image(im.size) scale_ratio_w = d_width / im.width scale_ratio_h = d_height / im.height im = im.resize((d_width, d_height), Image.BICUBIC) x = transform(im) x = x[np.newaxis, :] # lock.acquire() y = self.model(x.cuda()).cpu().detach().numpy() # lock.release() y = np.squeeze(y) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], cfg.pixel_threshold) activation_pixels = np.asarray(np.where(cond), dtype=np.int32) quad_scores, quad_after_nms = nms(y, activation_pixels[0], activation_pixels[1]) if self.isDraw: quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'aqua' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line( [(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(self.result_dir + img_name[:-4] + '_act.jpg') quad_draw = ImageDraw.Draw(quad_im) txt_items = [] invalid = 0 for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: if self.isDraw: quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=2, fill='aqua') if cfg.predict_cut_text_line: self.cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_name, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') else: invalid += 1 if self.isDraw: quad_im.save(self.result_dir + img_name[:-4] + '_predict.jpg') with open(txt_path, 'w') as f_txt: f_txt.writelines(txt_items) return (len(txt_items), img_name)