def detect( self, image_data ): input_image = image_data.asarray().astype( 'uint8' ) from mmdet.apis import inference_detector gpu_string = 'cuda:' + str( self._gpu_index ) detections = inference_detector( self._model, input_image, self._cfg, device=gpu_string ) class_names = [ 'fish' ] * 10000 if isinstance( detections, tuple ): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size( bbox_result ) > 0: bboxes = np.vstack( bbox_result ) else: bboxes = [] sys.stdout.write( "Detected " + str( len( bbox_result ) ) + " objects" ) sys.stdout.flush() # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list( segm_result ) inds = np.where( bboxes[:, -1] > score_thr )[0] for i in inds: masks.append( maskUtils.decode( segms[i] ).astype( np.bool ) ) # collect labels labels = [ np.full( bbox.shape[0], i, dtype=np.int32 ) for i, bbox in enumerate( bbox_result ) ] if np.size( labels ) > 0: labels = np.concatenate( labels ) else: labels = [] # convert to kwiver format, apply threshold output = [] for entry in []: output.append( DetectedObject( BoundingBox( 1,1,2,2 ) ) ) if np.size( labels ) > 0: mmcv.imshow_det_bboxes( input_image, bboxes, labels, class_names=class_names, score_thr=-100.0, show=True) return DetectedObjectSet( output )
def result2json(checkpoint_file): config_file = 'code/cascade_skypool_dataenhancement.py' # 修改成自己的配置文件 # checkpoint_file = 'work_dirs/cascade_custom_data_enhancement/epoch_56.pth' # 修改成自己的训练权重 test_path = 'data/guangdong1_round1_testA_20190818/testA' # 官方测试集图片路径 json_name = "result_" + "" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + ".json" model = init_detector(config_file, checkpoint_file, device='cuda:0') img_list = [] for img_name in os.listdir(test_path): if img_name.endswith('.jpg'): img_list.append(img_name) result = [] for i, img_name in enumerate(img_list, 1): full_img = os.path.join(test_path, img_name) predict = inference_detector(model, full_img) for i, bboxes in enumerate(predict, 1): if len(bboxes) > 0: # defect_label = lable[i] # print(defect_label) defect_label = i print(i) image_name = img_name for bbox in bboxes: x1, y1, x2, y2, score = bbox.tolist() x1, y1, x2, y2 = round(x1, 2), round(y1, 2), round( x2, 2), round(y2, 2) # save 0.00 result.append({ 'name': image_name, 'category': defect_label, 'bbox': [x1, y1, x2, y2], 'score': score }) with open(json_name, 'w') as fp: json.dump(result, fp, indent=4, separators=(',', ': '))
def inference_single(self, imagname, slide_size, chip_size): img = mmcv.imread(imagname) height, width, channel = img.shape slide_h, slide_w = slide_size hn, wn = chip_size # TODO: check the corner case # import pdb; pdb.set_trace() total_detections = [ np.zeros((0, 9)) for _ in range(len(self.classnames)) ] for i in tqdm(range(int(width / slide_w + 1))): for j in range(int(height / slide_h) + 1): subimg = np.zeros((hn, wn, channel)) # print('i: ', i, 'j: ', j) chip = img[j * slide_h:j * slide_h + hn, i * slide_w:i * slide_w + wn, :3] subimg[:chip.shape[0], :chip.shape[1], :] = chip chip_detections = inference_detector(self.model, subimg) # print('result: ', result) for cls_id, name in enumerate(self.classnames): chip_detections[cls_id][:, :8][:, ::2] = chip_detections[ cls_id][:, :8][:, ::2] + i * slide_w chip_detections[cls_id][:, :8][:, 1::2] = chip_detections[ cls_id][:, :8][:, 1::2] + j * slide_h # import pdb;pdb.set_trace() try: total_detections[cls_id] = np.concatenate( (total_detections[cls_id], chip_detections[cls_id])) except: import pdb pdb.set_trace() # nms for i in range(len(self.classnames)): keep = py_cpu_nms_poly_fast_np(total_detections[i], 0.1) total_detections[i] = total_detections[i][keep] return total_detections
def solo_infer(model, img, conf): image_np = np.array(Image.open(img)) result, _ = inference_detector(model, img) cur_result = result[0] if cur_result is not None: masks = cur_result[0].cpu().numpy().astype(np.uint8) classes = cur_result[1].cpu().numpy() scores = cur_result[2].cpu().numpy() h, w = masks[0].shape vis_inds = (scores > conf) masks = masks[vis_inds] classes = classes[vis_inds] areas = [mask.sum() for mask in masks] sorted_inds = np.argsort(areas)[::-1] keep_inds = [] for i in sorted_inds: if i != 0: for j in range(i): if np.sum((masks[i, :, :] > 0) * (masks[j, :, :] > 0)) / np.sum(masks[j, :, :] > 0) > 0.85: break keep_inds.append(i) masks = masks[keep_inds] classes = classes[keep_inds] instance_map = np.zeros((h, w), dtype=np.uint8) semantic_map = np.zeros((h, w), dtype=np.uint8) if masks is not None: for i, (mask, cls) in enumerate(zip(masks, classes)): instance_map[mask > 0] = i + 1 semantic_map[mask > 0] = cls + 1 if cls in [0, 1, 7]: color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8) mask_bool = mask.astype(np.bool) image_np[mask_bool] = image_np[mask_bool] * 0.5 + color_mask * 0.5 final_mask = np.stack([instance_map, semantic_map], axis=-1) return masks, classes, final_mask, image_np
def main(): parser = ArgumentParser() # parser.add_argument('img', help='Image file') parser.add_argument('config', help='Config file') parser.add_argument('checkpoint', help='Checkpoint file') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--score-thr', type=float, default=0.000000001, help='bbox score threshold') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) # test a single image # import pdb; # pdb.set_trace() # test_img_dir = "/home/dell/桌面/tile_round1_testA_20201231/tile_round1_testA_20201231/cam3/crop/" imgs_dir = os.listdir(test_img_dir) json_result = [] for img in imgs_dir: img_name = img img = test_img_dir + img result = inference_detector(model, img) # show the results show_result_pyplot(model, result, img, img_name, json_result=json_result, score_thr=args.score_thr) with open("/home/dell/桌面/guangdong/result_cam3.json", "w") as f: json.dump(json_result, f, cls=MyEncoder, indent=6) print("加载入文件完成...")
def detect(self, img: np.ndarray) -> List[Detection]: raw_result = inference_detector(self.model, img) detections = [] if isinstance(raw_result, tuple): for class_id in range(len(raw_result[0])): box_result = raw_result[0][class_id] mask_result = raw_result[1][class_id] valid_indices = np.where(box_result[:, 4] > self.conf_thres)[0] box_result = box_result[valid_indices] mask_result = [mask_result[i] for i in valid_indices] if mask_result != []: # If mask result is empty, decode() will raise error # mask_result = decode(mask_result).astype(np.bool) detections.extend([ Detection(box_result[i][:4], box_result[i][4], class_id=class_id, mask=mask_result[i]) for i in range(len(box_result)) ]) detections.extend([ Detection(box_result[i][:4], box_result[i][4], class_id=class_id, mask=None) for i in range(len(box_result)) ]) else: # Only boxes with class "person" are counted for class_id in range(len(raw_result)): class_result = raw_result[class_id] class_result = class_result[np.where( class_result[:, 4] > self.conf_thres)] detections.extend([ Detection(line[:4], line[4], class_id=class_id) for line in class_result ]) return detections
def predict(self, tasks, **kwargs): assert len(tasks) == 1 task = tasks[0] image_path = get_image_local_path(task['data'][self.value], project_dir=self.project_path) model_results = inference_detector(self.model, image_path) results = [] all_scores = [] img_width, img_height = get_image_size(image_path) for bboxes, label in zip(model_results, self.model.CLASSES): output_label = self.label_map.get(label, label) if output_label not in self.labels_in_config: print(output_label + ' label not found in project config.') continue for bbox in bboxes: bbox = list(bbox) if not bbox: continue score = float(bbox[-1]) if score < self.score_thresh: continue x, y, xmax, ymax = bbox[:4] results.append({ 'from_name': self.from_name, 'to_name': self.to_name, 'type': 'rectanglelabels', 'value': { 'rectanglelabels': [output_label], 'x': x / img_width * 100, 'y': y / img_height * 100, 'width': (xmax - x) / img_width * 100, 'height': (ymax - y) / img_height * 100 }, 'score': score }) all_scores.append(score) avg_score = sum(all_scores) / len(all_scores) return [{'result': results, 'score': avg_score}]
def myfunc2(x): idx, img = x print('myfunc2: ', idx) result = inference_detector(model, img) print('len of share_submit_result:', len(share_submit_result)) for cls, item in enumerate(result): if item is None: continue else: for row in item: # 获取锁 share_lock.acquire() share_submit_result.append({ 'name': img_name, 'category': cls + 1, 'bbox': row[:4].tolist(), 'score': str(row[4]) }) # 释放锁 share_lock.release()
def main(): parser = ArgumentParser() parser.add_argument('img', help='Image file') parser.add_argument('config', help='Config file') parser.add_argument('checkpoint', help='Checkpoint file') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--score-thr', type=float, default=0.3, help='bbox score threshold') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) # test a single image result = inference_detector(model, args.img) print("result is ", len(result)) for i in range(len(result)): print("result ", i, " is ", len(result[i])) print("get result!!!!!!!!!!!!!", result)
def main(): parser = ArgumentParser() parser.add_argument('img', help='Image file') parser.add_argument('config', help='Config file') parser.add_argument('checkpoint', help='Checkpoint file') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--score-thr', type=float, default=0.3, help='bbox score threshold') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) # test a single image result = inference_detector(model, args.img) # show the results show_result_pyplot(args.img, result, [model.CLASSES], score_thr=args.score_thr)
def kitti(): cfg = mmcv.Config.fromfile('/nfs/project/libo_i/mmdetection/configs/KITTI/cascade_mask_rcnn_x101_64x4d_fpn_1x.py') cfg.model.pretrained = None # construct the model and load checkpoint model = build_detector(cfg.model, test_cfg=cfg.test_cfg) _ = load_checkpoint(model, '/nfs/project/libo_i/mmdetection/work_dirs/kitti_cascade_mask_rcnn_x101_64x4d_fpn_1x/epoch_24.pth') # test a list of images img_path = '/nfs/project/libo_i/mmdetection/data/kitti/testing/kitti_demo_image' fs = os.listdir(img_path) imgs = [] for item in fs: item_path = os.path.join(img_path, item) if not os.path.isdir(item_path): imgs.append(item_path) for i, result in enumerate(inference_detector(model, imgs, cfg, device='cuda:0')): print(i, imgs[i]) save_path = "/nfs/project/libo_i/mmdetection/selfmade/kitti_demo_image/24_kitti_inferred/{}".format(os.path.basename(imgs[i])) print(save_path) show_result(imgs[i], result, dataset='kitti', outfile=save_path)
def infer_image(self, image): # compute prediction result = inference_detector(self.model, image) # import numpy as np bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] labels = np.concatenate(labels) score_thr = 0.3 if score_thr > 0: assert bboxes.shape[1] == 5 scores = bboxes[:, -1] inds = scores > score_thr scores = bboxes[inds, -1] boxes = bboxes[inds, :4] labels = labels[inds] scores = scores.tolist() boxes = boxes.tolist() labels = labels.tolist() # print("scores:", scores) # print("bboxes:", boxes) # print("labels", labels) labels_words = [str(self.classes[idx]) for idx in labels] # print(labels_words) return { 'boxes': boxes, 'labels_words': labels_words, 'labels_idx': labels, 'scores': scores }
def solo_infer(solo_cp_path, src_folder, dst_folder, json_path): config_file = '../configs/solo/decoupled_solo_r50_fpn_8gpu_3x.py' # 加载solo模型 model = init_detector(config_file, solo_cp_path, device='cuda:0') # 创建json文件 fjson = open(json_path, 'w') # 读取所有输入图像的路径 all_img_paths = glob.glob(os.path.join(src_folder, '*/*.*')) num_img = len(all_img_paths) # 开始处理 state = {'num': float(num_img)} content = [] # 所有图像的信息 for i in tqdm(range(num_img)): img_path = all_img_paths[i] img_name = img_path.split('/')[-1] if cv2.imread(img_path) is None: print(img_path) continue result = inference_detector(model, img_path) # 单张图像通过solo dst_path = os.path.join(dst_folder, img_name) # 以相同图像名称保存 show_result_ins(img_path, result, model.CLASSES, score_thr=0.25, out_file=dst_path) # 绘制 mask并保存 # 分析mask信息 img_info = post_treatment(img_path, result, model.CLASSES, score_thr=0.25) # print(img_info) content.append(img_info) # 保存到json文件 state['content'] = content json.dump(state, fjson, indent=4) fjson.close()
def predict(self, tasks, **kwargs): assert len(tasks) == 1 task = tasks[0] image_path = get_image_local_path(task["data"][self.value]) model_results = inference_detector(self.model, image_path) results = [] all_scores = [] img_width, img_height = get_image_size(image_path) for bboxes, label in zip(model_results, self.model.CLASSES): output_label = self.label_map.get(label, label) if output_label not in self.labels_in_config: print(output_label + " label not found in project config.") continue for bbox in bboxes: bbox = list(bbox) if not bbox: continue score = float(bbox[-1]) if score < self.score_thresh: continue x, y, xmax, ymax = bbox[:4] results.append({ "from_name": self.from_name, "to_name": self.to_name, "type": "rectanglelabels", "value": { "rectanglelabels": [output_label], "x": int(x / img_width * 100), "y": int(y / img_height * 100), "width": int((xmax - x) / img_width * 100), "height": int((ymax - y) / img_height * 100), }, "score": score, }) all_scores.append(score) avg_score = sum(all_scores) / len(all_scores) return [{"result": results, "score": avg_score}]
def inference_split(self, img: Path, threshold: float = DEFAULT_THRESHOLD, padding: int = 0): image = cv2.imread(str(img.absolute())) split_weight = [0.2, 0.43, 0.65, 0.84, 1] splits = [] prev = 0 for split_w in split_weight: border = int(split_w * image.shape[0]) splits.append(image[prev:border]) prev = border tables = [] headers = [] prev_shape = 0 for num, split in enumerate(splits): split = add_padding(split, padding) result = inference_detector(self.model, split) if self.should_visualize: inference_image = self.model.show_result(split, result, thickness=2) image_path = (img.parent.parent / "raw_model" / img.name / f"{num}.png") image_path.parent.mkdir(parents=True, exist_ok=True) cv2.imwrite(str(image_path.absolute()), inference_image) inf_tables, header, _ = inference_result_to_boxes( extract_boxes_from_result(result, CLASS_NAMES, score_thr=threshold)) crop_padding([(inf_tables, headers)], padding) shift(inf_tables, headers, prev_shape, 0) tables.extend(inf_tables) headers.extend(header) prev_shape += split.shape[0] return tables, headers
def inference(self, preprocessed_image): """ predict bounding boxes of a preprocessed image Args: preprocessed_image: a preprocessed RGB frame Returns: A list of dictionaries, each item has the id, relative bounding box coordinate and prediction confidence score of one detected instance. """ self.frame = preprocessed_image output_dict = inference_detector(self.detection_model, preprocessed_image) class_id = int(self.config.get_section_dict('Teacher')['ClassID']) score_threshold = float(self.config.get_section_dict('Teacher')['MinScore']) result = [] for i, box in enumerate(output_dict[0]): # number of boxes if box[-1] > score_threshold: result.append({"id": str(class_id) + '-' + str(i), "bbox": [box[0] / self.image_size[0], box[1] / self.image_size[1], box[2] / self.image_size[0], box[3] / self.image_size[1]], "score": box[-1]}) return result
def inference_model(config_name, checkpoint, args, logger=None): cfg = Config.fromfile(config_name) if args.aug: if 'flip' in cfg.data.test.pipeline[1]: cfg.data.test.pipeline[1].flip = True else: if logger is not None: logger.error(f'{config_name}: unable to start aug test') else: print(f'{config_name}: unable to start aug test', flush=True) model = init_detector(cfg, checkpoint, device=args.device) # test a single image result = inference_detector(model, args.img) # show the results if args.show: show_result_pyplot(model, args.img, result, score_thr=args.score_thr, wait_time=args.wait_time) return result
def mmdInferenceOnExternalImages(external_input_images_path, output_result_path, mmd_config_file, mmd_checkpoint): """Note that, external_input_images_path puts all rgb images instead of both images and depths folder.""" # build the model from a config file and a checkpoint file model = init_detector(mmd_config_file, mmd_checkpoint, device='cuda:0') # test a single image and show the results images = os.listdir(external_input_images_path) results = {} with tqdm(total=len(images)) as pbar: for image_name in images: image_path = os.path.join(external_input_images_path, image_name) result = inference_detector(model, image_path) output_path = os.path.join(output_result_path, image_name) # visualize the results in a new window # show_result(image_path, result, model.CLASSES) # or save the visualization results to image files show_result(image_path, result, model.CLASSES, show=False, out_file=output_path) results[image_name] = result pbar.update(1) return results
def detect_logo_location(image_path): list_img =[] img = mmcv.imread(image_path) j=0 dst_dir = "/content/drive/My Drive/logo detection: phase 2/dataset for testing/crop" result = inference_detector(mmdetection_model, img) # lấy bouding box từ hình sử dụng model mmdetection if result: # nếu tập không rỗng( có tồn tại logo) img = Image.open(image_path) for i in range(len(result[0])): # loop qua từng bounding box trong hình if result[0][i][4] >= 0.3: # nếu probality > 0.5 lấy bouding và crop hình left = result[0][i][0] top = result[0][i][1] right = result[0][i][2] bottom = result[0][i][3] im_crop = img.crop((left, top, right, bottom)) if im_crop.mode == "RGBA": im_crop = im_crop.convert('RGB') name = os.path.join(dst_dir, "crop_" + str(j) + ".jpg") im_crop.save(name) j=j+1 list_img.append(name) return list_img
def show_image(image_array, model, fps_s, format_='jpeg'): f = BytesIO() result = inference_detector(model, image_array) result[0][:, -1] = 0 img_out = model.show_result( image_array, result, score_thr=0.35, show=False, font_scale=0.0, thickness=3, bbox_color='green', ) fps = fps_s[-1] if fps_s else 0.0 cv2.putText(img_out, str(fps)[:5], (1200, 20), cv2.FONT_HERSHEY_COMPLEX, 0.9, (0, 255, 0)) img_out = cv2.cvtColor(img_out, cv2.COLOR_BGR2RGB) pil_img = PIL.Image.fromarray(img_out) pil_img.save(f, format=format_) IPython.display.display(IPython.display.Image(data=f.getvalue()))
def detector(self,img): ''' :param img: opencv读取的图片,底层为numpy格式 :return: result ,size=[n*6],其中n为检测到的目标数量 result[:,0]=目标类别 result[:,1:4]=x_min,y_min,x_max,y_max result[:,5]=置信度 ''' self.det_result=inference_detector(self.net_model,img) labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(self.det_result) ] self.labels = np.concatenate(labels) self.labels=self.labels.reshape(self.config.num_roi,1) self.bboxes = np.vstack(self.det_result) self.result=np.hstack((self.labels,self.bboxes)) # 基于重叠度的筛选 nms_iou_detec=nms_iou(self.config.miji_thr,self.config.h_thr,self.config.nms_thr,self.config.num_thr) self.result=nms_iou_detec.mix_nms(self.result) return self.result
def SlideIndex(PredImg,im_height,im_width,model): new_name = len(os.listdir(SavePath)) + 1 bboxouts = [] for imgi in tqdm(range(0, im_height,image_size[0])): for imgj in range(0, im_width,image_size[1]): cropped = PredImg[imgi:imgi+image_size[0], imgj:imgj+image_size[1],:] if cropped.shape[0]< image_size[0] or cropped.shape[1]< image_size[1]: NewI = np.zeros((image_size[0], image_size[0], 3)) NewI[:cropped.shape[0],:cropped.shape[1], :] = cropped[:,:,:] cropped = NewI resultbbox = inference_detector(model, cropped) # # 切片文件保存 # singleImg = visualize(cropped,resultbbox) # cv2.imwrite(SavePath + "/%d.png"%new_name,singleImg) # cv2.destroyAllWindows() # bbox偏移 bboxTrans = bbox_transfrom(imgi,imgj,resultbbox) bboxouts.append(resultbbox) new_name=new_name+1 return bboxouts
def main(): args = parse_args() model = init_detector(args.config, args.checkpoint, device=torch.device('cuda', args.device)) camera = cv2.VideoCapture(args.camera_id) print('Press "Esc", "q" or "Q" to exit.') while True: ret_val, img = camera.read() result = inference_detector(model, img) ch = cv2.waitKey(1) if ch == 27 or ch == ord('q') or ch == ord('Q'): break show_result(img, result, model.CLASSES, score_thr=args.score_thr, wait_time=1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', help= 'test config file path') parser.add_argument('--checkpoint', help= 'checkpoint file') args = parser.parse_args() config_file = args.config checkpoint_file = args.checkpoint model = init_detector(config_file, checkpoint_file, device= 'cuda:0') image_path = os.path.abspath("./data/only_carTruck_frame/") result_path = os.path.abspath("./data/only_carTruck_inference") images = sorted(os.listdir(image_path)) integer = 0 # images에 있는 image를 하나씩 뽑아가면서 inference 시켜야함 for imgs in images: img_for_inference = image_path + "/" + imgs result = inference_detector(model, img_for_inference) last_path = result_path + "/{0:06d}".format(integer) + ".jpg" save_result_pyplot(model, img_for_inference, result, last_path) integer += 1
def main(): args = get_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.ckpt, device='cuda:0') if len(args.input) == 1: args.input = glob.glob(args.input[0]) for img_path in args.input: # test a single image and show the results # img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once img = mmcv.imread(img_path) result = inference_detector(model, img) # visualize the results in a new window out_file = None if args.output is None else osp.join( args.output, str(time.time()) + '.jpg') show_result(img, result, model.CLASSES, score_thr=args.score_thr, out_file=out_file)
def get_frt_fb(img, img_name): detections = inference_detector(model_fb, img) detections = detections[0] scores = detections[:, 4] if len(scores) > 0: max_score = np.max(scores) # 把最大置信度认为赛第四类的概率 cnt = 0 for j in range(len(detections)): score = scores[j] if score > 0.7: # 认为大于0.7的肯定大概率是第四类 cnt += 1 if max_score > 0.9: # 大于0.9直接认为是第四类 is_fb = 3 else: is_fb = -1 else: max_score = 0 is_fb = -1 cnt = 0 features_dic_fb['name'].append(img_name) features_dic_fb['fb_cnt'].append(cnt) features_dic_fb['fb_prob'].append(max_score) features_dic_fb['is_fb'].append(is_fb)
def detection_inference(args, frame_paths): """Detect human boxes given frame paths. Args: args (argparse.Namespace): The arguments. frame_paths (list[str]): The paths of frames to do detection inference. Returns: list[np.ndarray]: The human detection results. """ model = init_detector(args.det_config, args.det_checkpoint, args.device) assert model.CLASSES[0] == 'person', ('We require you to use a detector ' 'trained on COCO') results = [] print('Performing Human Detection for each frame') prog_bar = mmcv.ProgressBar(len(frame_paths)) for frame_path in frame_paths: result = inference_detector(model, frame_path) # We only keep human detections with score larger than det_score_thr result = result[0][result[0][:, 4] >= args.det_score_thr] results.append(result) prog_bar.update() return results
def main(): parser = ArgumentParser() #parser.add_argument('img', help='Image file') parser.add_argument('config', help='Config file') parser.add_argument('checkpoint', help='Checkpoint file') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( '--score-thr', type=float, default=0.5, help='bbox score threshold') parser.add_argument('--path', type=str, default='/home/ryo/W2020V1_142_NECvsAGE_F8_200119.MP4') parser.add_argument('--out-dir', type=str, default='./') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) cap_file = cv2.VideoCapture(args.path) out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 30, (1280,720)) count = 0 while (cap_file.isOpened()): print(count) if count > 10000: break count+=1 ret, frame = cap_file.read() if ret: result = inference_detector(model, frame) result = np.vstack(result[0]) bboxes = result[result[:, -1] > args.score_thr] for j in range(bboxes.shape[0]): left_top = (bboxes[j, 0], bboxes[j, 1]) right_bottom = (bboxes[j, 2], bboxes[j, 3]) cv2.rectangle(frame, left_top, right_bottom, (0, 0 ,255), thickness=1) out.write(frame) else: break cap_file.release() out.release()
def run(self): # 测试一张图片 dirname = self.dirname imgs = os.listdir(dirname) if imgs is not None: imgs = sorted(imgs, key=lambda x: int(x.split('/')[-1].split('.')[0])) img_size = cv2.imread(dirname + "/" + imgs[0]).shape flag = 0 for img in imgs: print(img) path = dirname + "/" + img id = img.split('.')[0] result = inference_detector(self.model, path) if isinstance(result, tuple): bbox_result, segm_result = result else: bbox_result, segm_result = result, None bboxes = np.vstack(bbox_result) scores = bboxes[:, -1] inds = scores > self.score bboxes = bboxes[inds, :] temp = [] for bbox in bboxes: bbox_int = bbox.astype(np.int32) temp.append(bbox_int) newbbox = self.check_bbox(temp) filename = id + ".tif" xmlwriter = PascalVocWriter("VOC2007", filename, img_size) for box in newbbox: xmlwriter.addBndBox(box[0], box[1], box[2], box[3], "nano", "0") xmlwriter.save(self.defaultSaveDir + "/" + id + ".xml") flag += 1 if self.quick_flag == 1: break prograssbar_value = round(flag / len(imgs), 2) * 100 self.progressBarValue[int].emit(int(prograssbar_value))
def process(self, img) -> ShelfInfo: ''' Process the image and get the segmentation result for the every row :param img: numpy array image :return: the ShelfInfo ''' prediction = inference_detector(self.model, img) polygon_fit = PolygonFit() results = [] for bbox, seg in zip(prediction[0][0], prediction[1][0]): if bbox[-1] < self.score_thr: continue contours, hierarchy = cv2.findContours(np.array(seg, dtype=np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = [cv2.convexHull(c) for c in contours] contours = [cv2.approxPolyDP(c, 0.001 * cv2.arcLength(c, True), True) for c in contours] contours = [c for c in contours if c.shape[0] > 2] if len(contours) == 0: continue elif len(contours) > 1: contour_areas = [cv2.contourArea(c) for c in contours] max_idx = np.argmax(contour_areas) contour = contours[max_idx] else: contour = contours[0] contour = contour.reshape(-1, 2) polygon = polygon_fit.fit(contour) results.append(dict( bbox=bbox[:-1].round().astype(np.int32).tolist(), points=contour.tolist(), polygon=polygon, score=float(bbox[-1]) )) results.sort(key=lambda x: x['bbox'][1]) results = remove_overlap(results) results = filter_results(results) return results
def predict(): CONFIDENCE_THRESHOLD = 0.0001 args = parse_args() cfg = mmcv.Config.fromfile(args.cfg) cfg.model.pretrained = None # construct the model and load checkpoint #model = build_detector(cfg.model, test_cfg=cfg.test_cfg) model = init_detector(args.cfg, args.weights, device='cuda:0') #_ = load_checkpoint(model, 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth') #for image in tqdm(glob("/mfs/home/limengwei/car_face/car_face/object_detection_data_back_seats/*/*.png")): for image in tqdm(glob("/mfs/home/limengwei/car_face/car_face/object_detection_data_angle_top_head/images_val/*.png")): #for image in tqdm(glob("/mfs/home/limengwei/car_face/car_face/object_detection_data_both_side_back_seats/*/*.png")): img = mmcv.imread(image) filename = image.split("/")[-1] start = time.time() result = inference_detector(model, img) print(time.time()-start) labels = np.concatenate([ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(result)]) #labels = labels + 1 bboxes = np.vstack(result) for label,bbox in zip(labels,bboxes): threshold = bbox[-1] if threshold < CONFIDENCE_THRESHOLD: continue x1,y1 = bbox[0],bbox[1] x2,y2 = bbox[2],bbox[1] x3,y3 = bbox[2],bbox[3] x4,y4 = bbox[0],bbox[3] text = id_to_label[label+1] cv2.putText(img,text,(int(x1),int(y1-2)),cv2.FONT_HERSHEY_COMPLEX,0.8,(0,255,255)) cv2.rectangle(img,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255),2) status = cv2.imwrite("outputs/%s"%filename,img) #embed() cv2.imshow("img",img) cv2.waitKey(0)