def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img,dtype = np.float32)[:,:,::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape(preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.centernet.predict(photo) if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0])>0: preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.confidence] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(det_xmin[top_indices],-1),np.expand_dims(det_ymin[top_indices],-1),np.expand_dims(det_xmax[top_indices],-1),np.expand_dims(det_ymax[top_indices],-1) boxes = centernet_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape) t1 = time.time() for _ in range(test_interval): preds = self.centernet.predict(photo) if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0])>0: preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.confidence] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(det_xmin[top_indices],-1),np.expand_dims(det_ymin[top_indices],-1),np.expand_dims(det_xmax[top_indices],-1),np.expand_dims(det_ymax[top_indices],-1) boxes = centernet_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def __get_bbox(self, image): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ org_image = np.copy(image) org_h, org_w, _ = org_image.shape yolo_input = utils.img_preprocess2(image, None, (self.__test_input_size, self.__test_input_size), False) yolo_input = yolo_input[np.newaxis, ...] pred_sbbox, pred_mbbox, pred_lbbox = self.__sess.run( [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox], feed_dict={ self.__input_data: yolo_input, self.__training: False } ) sbboxes = self.__convert_pred(pred_sbbox, (org_h, org_w), self.__valid_scales[0]) mbboxes = self.__convert_pred(pred_mbbox, (org_h, org_w), self.__valid_scales[1]) lbboxes = self.__convert_pred(pred_lbbox, (org_h, org_w), self.__valid_scales[2]) # sbboxes = self.__valid_scale_filter(sbboxes, self.__valid_scales[0]) # mbboxes = self.__valid_scale_filter(mbboxes, self.__valid_scales[1]) # lbboxes = self.__valid_scale_filter(lbboxes, self.__valid_scales[2]) bboxes = np.concatenate([sbboxes, mbboxes, lbboxes], axis=0) bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms') return bboxes
def __call__(self, loc, score, anchor, img_size, scale=1.): if self.mode == "training": n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # 将RPN网络预测结果转化成建议框 roi = loc2bbox(anchor, loc) # 利用slice进行分割,防止建议框超出图像边缘 roi[:, slice(0, 4, 2)] = np.clip(roi[:, slice(0, 4, 2)], 0, img_size[1]) roi[:, slice(1, 4, 2)] = np.clip(roi[:, slice(1, 4, 2)], 0, img_size[0]) # 宽高的最小值不可以小于16 min_size = self.min_size * scale # 计算高宽 ws = roi[:, 2] - roi[:, 0] hs = roi[:, 3] - roi[:, 1] # 防止建议框过小 keep = np.where((hs >= min_size) & (ws >= min_size))[0] roi = roi[keep, :] score = score[keep] # 取出成绩最好的一些建议框 order = score.ravel().argsort()[::-1] if n_pre_nms > 0: order = order[:n_pre_nms] roi = roi[order, :] roi = nms(roi, self.nms_thresh) roi = torch.Tensor(roi) roi = roi[:n_post_nms] return roi
def __get_bbox(self, image): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ if self.__multi_test: test_input_sizes = self.__train_input_sizes[::3] bboxes_list = [] for test_input_size in test_input_sizes: valid_scale = (0, np.inf) bboxes_list.append( self.__predict(image, test_input_size, valid_scale)) if self.__flip_test: bboxes_flip = self.__predict(image[:, ::-1, :], test_input_size, valid_scale) bboxes_flip[:, [0, 2]] = image.shape[1] - bboxes_flip[:, [2, 0]] bboxes_list.append(bboxes_flip) bboxes = np.row_stack(bboxes_list) else: bboxes = self.__predict(image, self.__test_input_size, (0, np.inf)) bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms') return bboxes
def generate_det(args): ckpt_path = args.checkpoint_path try: names = os.listdir(ckpt_path) for name in names: out = re.findall("ResNet_.*", name) if out != []: ckpt_path = out[0] break ckpt_path = os.path.join(args.checkpoint_path, ckpt_path) except Exception: print("There is no checkpoint in ", args.checkpoint) exit model = RC3D_resnet.RC3D(num_classes, cfg.Test.Image_shape, args.feature_path) model = model.cuda() model.zero_grad() model.load(ckpt_path) test_batch = utils.new_Batch_Generator(name_to_id, num_classes, args.image_path, args.annotation_path, 'test') fp = [] det = [] for i in range(1, num_classes): f = open( os.path.join(args.json_path, "detection_{}.json".format(str(i))), 'w') fp.append(f) det.append({}) det[i - 1]['object'] = [] try: while True: with torch.no_grad(): data, gt = next(test_batch) _, _, object_cls_score, object_offset = model.forward(data) #bbox 是按照score降序排列的 bbox = utils.nms(model.proposal_bbox, object_cls_score, object_offset, model.num_classes, model.im_info) if bbox is None: continue #pdb.set_trace() for _cls, score, proposal in zip(bbox['cls'], bbox['score'], bbox['bbox']): if proposal[:, 0] == proposal[:, 1]: continue temp_dict = {} temp_dict['file_name'] = data temp_dict['start'] = float(proposal[:, 0]) temp_dict['end'] = float(proposal[:, 1]) temp_dict['score'] = float(score) det[int(_cls[0]) - 1]['object'].append(temp_dict) torch.cuda.empty_cache() except StopIteration: for i in range(num_classes - 1): json.dump(det[i], fp[i]) fp[i].close() print("generate_gt Done!")
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] # 图片预处理,归一化 photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.centernet.predict(photo) if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return image preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) # 筛选出其中得分高于confidence的框 det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def reconstruct(Iorig, I, Y, out_size, threshold=.9): net_stride = 2**4 side = ((208. + 40.) / 2.) / net_stride # 7.75 Probs = Y[..., 0] Affines = Y[..., 2:] rx, ry = Y.shape[:2] ywh = Y.shape[1::-1] iwh = np.array(I.shape[1::-1], dtype=float).reshape((2, 1)) xx, yy = np.where(Probs > threshold) WH = getWH(I.shape) MN = WH / net_stride vxx = vyy = 0.5 #alpha base = lambda vx, vy: np.matrix([[-vx, -vy, 1.], [vx, -vy, 1.], [vx, vy, 1.], [-vx, vy, 1.]]).T labels = [] for i in range(len(xx)): y, x = xx[i], yy[i] affine = Affines[y, x] prob = Probs[y, x] mn = np.array([float(x) + .5, float(y) + .5]) A = np.reshape(affine, (2, 3)) A[0, 0] = max(A[0, 0], 0.) A[1, 1] = max(A[1, 1], 0.) pts = np.array(A * base(vxx, vyy)) #*alpha pts_MN_center_mn = pts * side pts_MN = pts_MN_center_mn + mn.reshape((2, 1)) pts_prop = pts_MN / MN.reshape((2, 1)) labels.append(DLabel(0, pts_prop, prob)) final_labels = nms(labels, .1) TLps = [] if len(final_labels): final_labels.sort(key=lambda x: x.prob(), reverse=True) for i, label in enumerate(final_labels): t_ptsh = getRectPts(0, 0, out_size[0], out_size[1]) ptsh = np.concatenate((label.pts * getWH(Iorig.shape).reshape( (2, 1)), np.ones((1, 4)))) H = find_T_matrix(ptsh, t_ptsh) Ilp = cv2.warpPerspective(Iorig, H, out_size, borderValue=.0) TLps.append(Ilp) return final_labels, TLps
def __get_bbox(self, image): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ bboxes = self.__predict(image, self.__test_input_size, (0, np.inf)) bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms') return bboxes
def _predict_pil(self, pil_img, **kwargs): ''' Args: pil_img: PIL.Image.Image input_size: int, input resolution conf_thres: float, confidence threshold ''' input_size = kwargs.get('input_size', self.input_size) conf_thres = kwargs.get('conf_thres', self.conf_thres) assert isinstance(pil_img, Image.Image), 'input must be a PIL.Image' assert input_size is not None, 'Please specify the input resolution' assert conf_thres is not None, 'Please specify the confidence threshold' # pad to square input_img, _, pad_info = utils.rect_to_square(pil_img, None, input_size, 0) input_ori = tvf.to_tensor(input_img) input_ = input_ori.unsqueeze(0) assert input_.dim() == 4 device = next(self.model.parameters()).device input_ = input_.to(device=device) with torch.no_grad(): dts = self.model(input_).cpu() dts = dts.squeeze() # post-processing dts = dts[dts[:, 5] >= conf_thres] if len(dts) > 1000: _, idx = torch.topk(dts[:, 5], k=1000) dts = dts[idx, :] if kwargs.get('debug', False): np_img = np.array(input_img) visualization.draw_dt_on_np(np_img, dts) plt.imshow(np_img) plt.show() dts = utils.nms(dts, is_degree=True, nms_thres=0.45, img_size=input_size) dts = utils.detection2original(dts, pad_info.squeeze()) if kwargs.get('debug', False): np_img = np.array(pil_img) visualization.draw_dt_on_np(np_img, dts) plt.imshow(np_img) plt.show() return dts
def run_result(org_img, input_size, params): original_image_size = org_img.shape[:2] img = image_preporcess(np.copy(org_img), [input_size, input_size], canny=params.canny) input_data = [img.astype(np.float32)] interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() bboxes = interpreter.get_tensor(merge_branch["index"]) pred_bbox = np.reshape(bboxes, (-1, 5 + params.class_num)) bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = nms(bboxes, 0.3, method='nms') draw_boxes(params, org_img, bboxes) return bboxes
def run_result(org_img, input_size, params): original_image_size = org_img.shape[:2] img = image_preporcess(np.copy(org_img), [input_size, input_size], canny=params.canny) pred_mbbox, pred_lbbox = sess.run(rtensor[1:], feed_dict={rtensor[0]: [img]}) pred_bbox = np.concatenate([ np.reshape(pred_mbbox, (-1, 5 + params.class_num)), np.reshape(pred_lbbox, (-1, 5 + params.class_num)) ], axis=0) bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3) bboxes = nms(bboxes, 0.3, method='nms') draw_boxes(params, org_img, bboxes) return bboxes
def detect_once(model, pil_img, conf_thres, nms_thres=0.45, input_size=608): ''' Run the model on the pil_img and return the detections. ''' ori_w, ori_h = pil_img.width, pil_img.height input_img, _, pad_info = utils.rect_to_square(pil_img, None, input_size, 0) input_img = tvf.to_tensor(input_img).cuda() with torch.no_grad(): dts = model(input_img[None]).cpu().squeeze() dts = dts[dts[:,5] >= conf_thres].cpu() dts = utils.nms(dts, is_degree=True, nms_thres=0.45) dts = utils.detection2original(dts, pad_info.squeeze()) # np_img = np.array(pil_img) # api_utils.draw_dt_on_np(np_img, detections) # plt.imshow(np_img) # plt.show() return dts
def test(args): runtime = AverageMeter() ckpt_path = args.checkpoint_path try: names = os.listdir(ckpt_path) for name in names: out = re.findall("ResNet_.*", name) if out != []: ckpt_path = out[0] break ckpt_path = os.path.join(args.checkpoint_path, ckpt_path) except Exception: print("There is no checkpoint in ", args.checkpoint) exit model = RC3D_resnet.RC3D(num_classes, cfg.Test.Image_shape, args.feature_path) model = model.cuda() model.zero_grad() model.load(ckpt_path) #test_batch = utils.Batch_Generator(name_to_id, num_classes, args.image_path, args.annotation_path, mode = 'test') test_batch = utils.new_Batch_Generator(name_to_id, num_classes, args.image_path, args.annotation_path) tic = time.time() data, gt = next(test_batch) with torch.no_grad(): pdb.set_trace() print(gt) _, _, object_cls_score, object_offset = model.forward(data) bbox = utils.nms(model.proposal_bbox, object_cls_score, object_offset, model.num_classes, model.im_info) toc = time.time() torch.cuda.empty_cache() runtime.update(toc - tic) print('Time {runtime.val:.3f} ({runtime.avg:.3f})\t'.format( runtime=runtime)) for _cls, score, proposal in zip(bbox['cls'], bbox['score'], bbox['bbox']): print( "class:{:}({:})\t score:{:.6f}\t start:{:.2f}\t end:{:.2f}\t" .format(id_to_name[int(_cls[0])], _cls[0], score[0], proposal[0, 0], proposal[0, 1]))
def __get_bbox(self, image): """ :param image: 要预测的图片 :return: 返回NMS后的bboxes,存储格式为(xmin, ymin, xmax, ymax, score, class) """ org_image = np.copy(image) org_h, org_w, _ = org_image.shape s0 = time.time() yolo_input = img_preprocess2( image, None, (self.__test_input_size, self.__test_input_size), False) yolo_input = yolo_input[np.newaxis, ...] s1 = time.time() print("process img time:", s1 - s0) pred_sbbox, pred_mbbox, pred_lbbox = sess.run( [self.__pred_sbbox, self.__pred_mbbox, self.__pred_lbbox], feed_dict={self.__input_data: yolo_input}) s2 = time.time() print("inference time:", s2 - s1) sbboxes = self.__convert_pred(pred_sbbox, (org_h, org_w), self.__valid_scales[0]) mbboxes = self.__convert_pred(pred_mbbox, (org_h, org_w), self.__valid_scales[1]) lbboxes = self.__convert_pred(pred_lbbox, (org_h, org_w), self.__valid_scales[2]) s3 = time.time() print("conver pred time:", s3 - s2) # sbboxes = self.__valid_scale_filter(sbboxes, self.__valid_scales[0]) # mbboxes = self.__valid_scale_filter(mbboxes, self.__valid_scales[1]) # lbboxes = self.__valid_scale_filter(lbboxes, self.__valid_scales[2]) bboxes = np.concatenate([sbboxes, mbboxes, lbboxes], axis=0) bboxes = utils.nms(bboxes, self.__score_threshold, self.__iou_threshold, method='nms') print("nms time:", time.time() - s3) return bboxes
def detect(self, img): img2 = utils.pred_img(img) resize_img = np.array(img2, dtype=np.float32) resize_img /= 255.0 resize_img = np.transpose(resize_img, (1, 2, 0)) # resize_img = np.astype(np.float32) images = [] images.append(resize_img) images = np.asarray(images) images = images.transpose((0, 3, 1, 2)) images = t.from_numpy(images) outputlist = [] if t.cuda.is_available: print("cuda is on") self.Detection.cuda() images = images.cuda() out = self.Detection(images) for i in range(3): outputlist.append(self.Decode(out[i])) # outputlist.append(self.Decode(out[i])) else: print("cuda is off") out = self.Detection(images) for i in range(3): outputlist.append(self.Decode(out[i])) for i in outputlist: print(i.shape) output = t.cat(outputlist, 1) betch_detection = utils.nms(output, cfg["class_num"], conf_thres=self.conf, nms_thres=0.4) print(output[0, 0, 0]) return output
def filter_prediction(boxes, probs, cls_idx): """ Filter bounding boxes with probability threshold and nms Args: boxes: [BATCH, 4], (cx, cy, w, h) probs: [BATCH, CLASS_NUM], class probability cls_idx: array of class indices Return: final_boxes: filtered bounding boxes final_probs: filtered probabilities final_cls_idx: filtered class indices """ if cfg.TOP_N_DETECTION < len(probs) and cfg.TOP_N_DETECTION > 0: order = probs.argsort()[:-cfg.TOP_N_DETECTION - 1:-1] probs = probs[order] boxes = boxes[order] cls_idx = cls_idx[order] else: filtered_idx = np.nonzero(probs > cfg.PROB_THRESHOLD)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] cls_idx = cls_idx[filtered_idx] final_boxes = [] final_probs = [] final_cls_idx = [] for c in range(cfg.NUM_CLASSES): idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = nms(boxes[idx_per_class], probs[idx_per_class], cfg.NMS_THRESHOLD) for i in range(len(keep)): if keep[i]: final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_cls_idx.append(c) return final_boxes, final_probs, final_cls_idx
def predict_batch(region_weight, edge_map_weight, junctions_weight): metrics = Metrics() for _id in _ids: # load detections fname = '{}/{}.jpg_5.pkl'.format(res_dir, _id) with open(fname, 'rb') as f: c = p.load(f, encoding='latin1') # apply non maxima supression cs, cs_c, th, th_c = nms(c['junctions'], c['junc_confs'], c['thetas'], c['theta_confs'], nms_thresh=8.0) # load annotations p_path = '{}/{}.npy'.format(annot_dir, _id) v_set = np.load(open(p_path, 'rb'), encoding='bytes') graph_annot = dict(v_set[()]) cs_annot, es_annot = load_annots(graph_annot) # load edge map edge_map_path = '{}/{}.jpg'.format(edge_dir, _id) im_path = '{}/{}.jpg'.format(rgb_dir, _id) edge_map = np.array(Image.open(edge_map_path).convert('L')) / 255.0 # load region masks region_path = '{}/{}.npy'.format(region_dir, _id) region_mks = np.load(region_path) region_mks = filter_regions(region_mks) # compute edge scores from classifier lw_from_cls = get_edge_scores(cs, region_mks, rgb_dir, _id) # Reconstruct junctions, juncs_on, lines_on, regs_sm_on = reconstructBuildingBaseline( cs, edge_map, use_junctions_with_var=True, use_regions=True, thetas=th, regions=region_mks, angle_thresh=5, with_corner_edge_confidence=True, corner_confs=cs_c, corner_edge_thresh=0.125, theta_confs=th_c, theta_threshold=0.25, region_hit_threshold=0.1, lw_from_cls=lw_from_cls, use_edge_classifier=True, closed_region_lowerbound=True, closed_region_upperbound=True, with_corner_variables=True, corner_min_degree_constraint=True, junctions_soft=True, region_intersection_constraint=True, inter_region_constraint=True, post_process=True, region_weight=region_weight, edge_map_weight=edge_map_weight, junctions_weight=junctions_weight, ) dwg = svgwrite.Drawing('../result/svg/{}.svg'.format(_id), (128, 128)) dwg.add(svgwrite.image.Image(edge_map_path, size=(128, 128))) im_path = os.path.join(rgb_dir, _id + '.jpg') draw_building(dwg, junctions, juncs_on, lines_on) dwg.save() metrics.forward(graph_annot, junctions, juncs_on, lines_on, _id) return metrics.edge_f_score()
for _id in _ids: # if '1548206121.73' not in _id: # continue # # # 1553980237.28 # load detections fname = '{}/{}.jpg_5.pkl'.format(res_dir, _id) with open(fname, 'rb') as f: c = p.load(f, encoding='latin1') # apply non maxima supression cs, cs_c, th, th_c = nms(c['junctions'], c['junc_confs'], c['thetas'], c['theta_confs'], nms_thresh=8.0) # load annotations p_path = '{}/{}.npy'.format(annot_dir, _id) v_set = np.load(open(p_path, 'rb'), encoding='bytes') graph_annot = dict(v_set[()]) cs_annot, es_annot = load_annots(graph_annot) # load edge map edge_map_path = '{}/{}.jpg'.format(edge_dir, _id) im_path = '{}/{}.jpg'.format(rgb_dir, _id) edge_map = np.array(Image.open(edge_map_path).convert('L')) / 255.0 # load region masks
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.centernet.predict(photo) #--------------------------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #---------------------------------------------------------------------------# if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return #-----------------------------------------------------------# # 将预测结果转换成小数的形式 #-----------------------------------------------------------# preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
features = tf.convert_to_tensor(net_out_reshaped) x = tf.Session().run( yolo_boxes_and_scores(features, anchors[anchor_mask[0]], nb_classes, model_image_size, org_image_shape)) boxes = np.concatenate( [x[0], np.reshape(x[2][0], (n_shape[1] * n_shape[1] * 3, 1)), x[1]], axis=1) all_boxes.extend(boxes) boxes_, scores_, classes_ = postprocess_boxes_tf(all_boxes, score_threshold=.3) image = draw_boxes_tf(boxes_, scores_, classes_, classes, org_image) image.show() ######################################################################################################### bboxes = postprocess_boxes(all_boxes, org_image, model_image_size[0], 0.3) bboxes = nms(bboxes, 0.45, method='nms') image = draw_bbox(org_image, bboxes, classes) image = fromarray(image) image.show() ######################################################################################################### pred_bbox = np.concatenate([ np.reshape(predictions[0], (-1, 5 + nb_classes)), np.reshape(predictions[0], (-1, 5 + nb_classes)), np.reshape(predictions[0], (-1, 5 + nb_classes)) ], axis=0) bboxes = postprocess_boxes(pred_bbox, org_image, model_image_size[0], 0.3) bboxes = nms(bboxes, 0.45, method='nms') image = draw_bbox(org_image, bboxes, classes)
def detect_face_limited(self, img, det_type=2): height, width, _ = img.shape if det_type >= 2: total_boxes = np.array( [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) output = self.RNet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[1][:, 1] > self.threshold[1]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[1][passed, 1].reshape((-1, )) reg = output[0][passed] # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick] total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) else: total_boxes = np.array( [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) num_box = total_boxes.shape[0] [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) output = self.ONet.predict(input_buf) # print(output[2]) # filter the total_boxes with threshold passed = np.where(output[2][:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[2][passed, 1].reshape((-1, )) reg = output[1][passed] points = output[0][passed] # compute landmark points bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[:, 0:5] = np.expand_dims( total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] points[:, 5:10] = np.expand_dims( total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] points = points[pick] if not self.accurate_landmark: return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1, total_boxes[:, 3] - total_boxes[:, 1] + 1) patchw = np.round(patchw * 0.25) # make it even patchw[np.where(np.mod(patchw, 2) == 1)] += 1 input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) for i in range(5): x, y = points[:, i], points[:, i + 5] x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width, height) for j in range(num_box): tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) tmpim[dy[j]:edy[j] + 1, dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1, x[j]:ex[j] + 1, :] input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input( cv2.resize(tmpim, (24, 24))) output = self.LNet.predict(input_buf) pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points
# get input image #im_arr = xs.squeeze(0).cpu().numpy().transpose(1, 2, 0) * 255.0 im_path = os.path.join(RGB_FOLDER, valid_list[k] + '.jpg') im = Image.open(im_path) #im = Image.fromarray(im_arr.astype('uint8')) # update metric pos_gt_ind = prob_gt > 0 pos_pred_ind = prob > .5 dets_gt = dets_gt[pos_gt_ind] dets = dets[pos_pred_ind] prob_gt = prob_gt[pos_gt_ind] prob = prob[pos_pred_ind] # apply nms dets, prob = nms(dets.detach().cpu().numpy(), prob.detach().cpu().numpy()) #dets, prob = dets.detach().cpu().numpy(), prob.detach().cpu().numpy() # draw outputs seg_im = compose_im(np.array(im), seg) draw = ImageDraw.Draw(seg_im) for p, det in zip(prob, dets): x, y = det draw.ellipse((x - 2, y - 2, x + 2, y + 2), fill='red') # draw ground truth - Debug for p, det in zip(prob_gt, dets_gt): x, y = det draw.ellipse((x - 1, y - 1, x + 1, y + 1), fill='blue') seg_im = seg_im.resize((512, 512)) mt.forward(valid_list[k], dets_gt.cpu().numpy(), dets)
def default_rule(det_df, **kwargs): assert 'prio_weight' in kwargs.keys(), 'Must input priority weight' assert 'prio_file' in kwargs.keys(), 'Must input priority file' # out dir out_dir = 'detection result images' if out_dir is not None: if not os.path.exists(out_dir): os.makedirs(out_dir) det_df['bbox_score'] = det_df.bbox + det_df.score.map(lambda x: [x]) prio_weight = kwargs['prio_weight'] prio_file = kwargs['prio_file'] if len(det_df) == 0: if kwargs['draw_imgs']: show_and_save_images(kwargs['img_path'], kwargs['img_name'], det_df.bbox_score.values, det_df.category.values, out_dir=out_dir) return kwargs['false_name'], 1 else: filtered = det_df[det_df['score'] >= kwargs['other_thr']] if len(filtered) == 0: return kwargs['other_name'], 1 # filtering filtered = filter_code(filtered, 'RES06', 0.9) filtered = filter_code(filtered, 'RES03', 0.85, 'RES05') filtered = filter_code(filtered, 'AZ08', 0.6) filtered = filter_code(filtered, 'STR02', 0.9, 'COM01') filtered = filter_code(filtered, 'STR04', 0.8, 'COM01') filtered = filter_code(filtered, 'COM03', 0.9) filtered = filter_code(filtered, 'PLN01', 0.8, 'RES05') filtered = filter_code(filtered, 'REP01', 0.9) # filtered = filter_code(filtered, 'COM01', 0.4) # # check in # if len(filtered) > 1: # if np.sum(filtered.category.values == 'QS') > 1: # code_df = filtered[filtered['category'] == 'QS'] # filtered = check_in_filter(filtered, code_df, 0.9) # nms if len(filtered) != 0: lst = [] for i in range(len(filtered)): lst.append(filtered.iloc[i, -1]) arr = np.array(lst) best_bboxes = nms(arr, 0.5) filtered = filtered[filtered['bbox_score'].map( lambda x: x in best_bboxes)] # judge res04 df_res05 = filtered[(filtered['category'] == 'RES05') & (filtered['score'] >= 0.5)] if len(df_res05) >= 3: filtered.loc[filtered['category'] == 'RES05', 'category'] = 'RES04' if len(filtered) == 0: if kwargs['draw_imgs']: show_and_save_images(kwargs['img_path'], kwargs['img_name'], filtered.bbox_score.values, filtered.category.values, out_dir=out_dir) return kwargs['false_name'], 1 Max_conf = max(filtered['score'].values) prio_thr = Max_conf * prio_weight filtered_final = filtered[filtered['score'] >= prio_thr] prio = pd.read_excel(prio_file) prio_lst = list(prio.values) final_code = prio_check(prio_lst, list(filtered_final['category'].values)) defect_score = max( filtered_final.loc[filtered['category'] == final_code, 'score'].values) # draw images if kwargs['draw_imgs']: show_and_save_images(kwargs['img_path'], kwargs['img_name'], filtered.bbox_score.values, filtered.category.values, out_dir=out_dir) return final_code, defect_score
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.input_shape[0], self.input_shape[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( preprocess_image(photo), [1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) preds = self.get_pred(photo).numpy() #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# if self.nms: preds = np.array(nms(preds, self.nms_threhold)) if len(preds[0]) <= 0: return image #-----------------------------------------------------------# # 将预测结果转换成小数的形式 #-----------------------------------------------------------# preds[0][:, 0:4] = preds[0][:, 0:4] / (self.input_shape[0] / 4) det_label = preds[0][:, -1] det_conf = preds[0][:, -2] det_xmin, det_ymin, det_xmax, det_ymax = preds[0][:, 0], preds[ 0][:, 1], preds[0][:, 2], preds[0][:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.input_shape[0], self.input_shape[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
else: # retrieve detections grid_size = 2.0 dets = grid_size * coords[0, :, :2] + anchors[0] dets_gt = grid_size * ys[0, :, :2] + anchors[0] # select detections prob_gt = ys[0, :, 2] prob = prob.view(-1) pos_gt_ind = prob_gt > 0 pos_pred_ind = prob > .5 dets_gt = dets_gt[pos_gt_ind] dets = dets[pos_pred_ind] prob = prob[pos_pred_ind] dets, prob = nms(np.array(dets), np.array(prob)) # update metric mt.forward(valid_list[i], np.array(dets_gt), np.array(dets)) # print epoch loss print('[%d] %s lr: %f \nloss: %.5f' % (epoch + 1, phase, optimizer.param_groups[0]['lr'], running_loss / len(dset_loader[phase]))) # tack best model if phase == 'val': recall, precision = mt.calc_metrics() f_score = 2.0 * precision * recall / (precision + recall + 1e-8) print('val f_score %.5f' % f_score)
def detect_face(self, img): """ detect face over img Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image Retures: ------- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score) bboxes points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) landmarks """ # check input height, width, _ = img.shape MIN_DET_SIZE = 12 if img is None: return None # only works for color image if len(img.shape) != 3: return None # detected boxes total_boxes = [] minl = min(height, width) # get all the valid scales scales = [] m = MIN_DET_SIZE / self.minsize minl *= m factor_count = 0 while minl > MIN_DET_SIZE: scales.append(m * self.factor**factor_count) minl *= self.factor factor_count += 1 sliced_index = self.slice_index(len(scales)) total_boxes = [] for batch in sliced_index: local_boxes = map( detect_first_stage_warpper, zip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0]))) total_boxes.extend(local_boxes) # remove the Nones total_boxes = [i for i in total_boxes if i is not None] if len(total_boxes) == 0: return None total_boxes = np.vstack(total_boxes) if total_boxes.size == 0: return None # merge the detection from first stage pick = nms(total_boxes[:, 0:5], 0.7, 'Union') total_boxes = total_boxes[pick] bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 # refine the bboxes total_boxes = np.vstack([ total_boxes[:, 0] + total_boxes[:, 5] * bbw, total_boxes[:, 1] + total_boxes[:, 6] * bbh, total_boxes[:, 2] + total_boxes[:, 7] * bbw, total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4] ]) total_boxes = total_boxes.T total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) ############################################# # second stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) output = self.RNet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[1][:, 1] > self.threshold[1]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[1][passed, 1].reshape((-1, )) reg = output[0][passed] # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick] total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) ############################################# # third stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) output = self.ONet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[2][:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[2][passed, 1].reshape((-1, )) reg = output[1][passed] points = output[0][passed] # compute landmark points bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[:, 0:5] = np.expand_dims( total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] points[:, 5:10] = np.expand_dims( total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] points = points[pick] if not self.accurate_landmark: return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1, total_boxes[:, 3] - total_boxes[:, 1] + 1) patchw = np.round(patchw * 0.25) # make it even patchw[np.where(np.mod(patchw, 2) == 1)] += 1 input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) for i in range(5): x, y = points[:, i], points[:, i + 5] x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width, height) for j in range(num_box): tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) tmpim[dy[j]:edy[j] + 1, dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1, x[j]:ex[j] + 1, :] input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input( cv2.resize(tmpim, (24, 24))) output = self.LNet.predict(input_buf) pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points
def get_FPS(self, image, test_interval): image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): images = Variable( torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) output = outputs[0] if len(output) > 0: batch_boxes, det_conf, det_label = output[:, : 4], output[:, 4], output[:, 5] det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) except: pass t1 = time.time() for _ in range(test_interval): with torch.no_grad(): outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) output = outputs[0] if len(output) > 0: batch_boxes, det_conf, det_label = output[:, : 4], output[:, 4], output[:, 5] det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims( det_ymin[top_indices], -1), np.expand_dims(det_xmax[top_indices], -1), np.expand_dims( det_ymax[top_indices], -1) boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) except: pass t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] # 图片预处理,归一化 photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): photo = np.asarray(photo) images = Variable(torch.from_numpy(photo).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) except: pass output = outputs[0] if len(output) <= 0: return image batch_boxes, det_conf, det_label = output[:, :4], output[:, 4], output[:, 5] # 筛选出其中得分高于confidence的框 det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] # 图片预处理,归一化 photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): photo = np.asarray(photo) images = Variable(torch.from_numpy(photo).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) except: pass output = outputs[0] if len(output) <= 0: return image batch_boxes, det_conf, det_label = output[:, :4], output[:, 4], output[:, 5] # 筛选出其中得分高于confidence的框 det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) # 去掉灰条 boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.image_size[0] for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image_id, image): f = open("./input/detection-results/" + image_id + ".txt", "w") self.confidence = 0.01 self.nms_threhold = 0.5 image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize #---------------------------------------------------------# crop_img = letterbox_image(image, [self.image_size[0], self.image_size[1]]) #----------------------------------------------------------------------------------# # 将RGB转化成BGR,这是因为原始的centernet_hourglass权值是使用BGR通道的图片训练的 #----------------------------------------------------------------------------------# photo = np.array(crop_img, dtype=np.float32)[:, :, ::-1] #-----------------------------------------------------------# # 图片预处理,归一化。获得的photo的shape为[1, 512, 512, 3] #-----------------------------------------------------------# photo = np.reshape( np.transpose(preprocess_image(photo), (2, 0, 1)), [1, self.image_size[2], self.image_size[0], self.image_size[1]]) with torch.no_grad(): images = Variable( torch.from_numpy(np.asarray(photo)).type(torch.FloatTensor)) if self.cuda: images = images.cuda() outputs = self.centernet(images) if self.backbone == 'hourglass': outputs = [ outputs[-1]["hm"].sigmoid(), outputs[-1]["wh"], outputs[-1]["reg"] ] #-----------------------------------------------------------# # 利用预测结果进行解码 #-----------------------------------------------------------# outputs = decode_bbox(outputs[0], outputs[1], outputs[2], self.image_size, self.confidence, self.cuda) #-------------------------------------------------------# # 对于centernet网络来讲,确立中心非常重要。 # 对于大目标而言,会存在许多的局部信息。 # 此时对于同一个大目标,中心点比较难以确定。 # 使用最大池化的非极大抑制方法无法去除局部框 # 所以我还是写了另外一段对框进行非极大抑制的代码 # 实际测试中,hourglass为主干网络时有无额外的nms相差不大,resnet相差较大。 #-------------------------------------------------------# try: if self.nms: outputs = np.array(nms(outputs, self.nms_threhold)) except: pass output = outputs[0] if len(output) <= 0: return image batch_boxes, det_conf, det_label = output[:, : 4], output[:, 4], output[:, 5] det_xmin, det_ymin, det_xmax, det_ymax = batch_boxes[:, 0], batch_boxes[:, 1], batch_boxes[:, 2], batch_boxes[:, 3] #-----------------------------------------------------------# # 筛选出其中得分高于confidence的框 #-----------------------------------------------------------# top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims( det_xmin[top_indices], -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims( det_xmax[top_indices], -1), np.expand_dims(det_ymax[top_indices], -1) #-----------------------------------------------------------# # 去掉灰条部分 #-----------------------------------------------------------# boxes = centernet_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.image_size[0], self.image_size[1]]), image_shape) for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) top, left, bottom, right = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return