def Predict(self, img_path, vis_thresh=0.3, output_img="output.jpg"): image = cv2.imread(img_path) bboxes = self.system_dict["local"]["detector"](image) image = draw_bboxes(image, bboxes, thresh=vis_thresh) cv2.imwrite(output_img, image) return bboxes
def main(args): cam = cv2.VideoCapture(args.device) if args.codec == 'YUY2': cam.set(cv2.CAP_PROP_FOURCC, 844715353.0) elif args.codec == 'MJPG': cam.set(cv2.CAP_PROP_FOURCC, 0x47504A4D) else: print('use default video codec.') if args.resolution: cam.set(cv2.CAP_PROP_FRAME_WIDTH, args.resolution[0]) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, args.resolution[1]) detector = CornerNet_Squeeze( model_name=args.model) if args.model else CornerNet_Squeeze() frame_count = 0 init_time = time() tic = time() try: while True: # Capture frame-by-frame if cam.grab(): _, frame = cam.retrieve() bboxes = detector(frame) frame = draw_bboxes(frame, bboxes) toc = time() frame_count += 1 else: continue # Calculate fps if toc - init_time > 3: fps = frame_count / (toc - tic) print('{:.2f}: {} x {} @ {:5.1f}'.format( time(), frame.shape[1], frame.shape[0], fps)) if toc - tic > 3: tic = time() frame_count = 0 # Show the resulting frame if args.visual: frame = cv2.resize(frame, (0, 0), fx=args.scale, fy=args.scale) cv2.imshow('/dev/video{}'.format(args.device), frame) if cv2.waitKey(1) & 0xFF == ord('q'): break except KeyboardInterrupt: print('\nKeyboardInterrupt') pass # When everything done, release the capture cam.release() cv2.destroyAllWindows()
def detect_video(video_path, output_path, start=0, end=0, classes=None, forbid_box=None): detector = CornerNet_Saccade() vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError("Couldn't open webcam or video") video_fps = vid.get(cv2.CAP_PROP_FPS) video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(output_path) out_fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, out_fourcc, video_fps, video_size) if classes is not None: classes = classes.split(',') if forbid_box is not None: forbid_box = [[[int(x) for x in b.split(',')] for b in s.split(';')] for s in forbid_box.split('|')] start, end = int(start)*1000, int(end) end = end if end == 0 else end*1000 width, height = 0, 0 while True: return_value, frame = vid.read() if return_value is False: break msec = int(vid.get(cv2.CAP_PROP_POS_MSEC)) if msec < start: continue if end > 0 and msec > end: break bboxes = detector(frame) if colors is None: set_colors([name for name in bboxes]) if classes is not None: bboxes = {k: v for k, v in bboxes.items() if k in classes} print('当前时间进度:%.2f秒' % (msec/1000)) image = draw_bboxes(frame, bboxes, colors=colors) out.write(image) print("width: %d, height: %d" % (width, height)) out.release()
def main(args): cam = cv2.VideoCapture(args.filename) detector = CornerNet_Squeeze(model_name=args.model) if args.model else CornerNet_Squeeze() frame_count = 0 init_time = time() tic = time() try: while True: # Capture frame-by-frame if cam.grab(): _, frame = cam.retrieve() frame = cv2.resize(frame, (640,360)) bboxes = detector(frame) frame = draw_bboxes(frame, bboxes) toc = time() frame_count += 1 else: continue # Calculate fps if toc - init_time > 3: fps = frame_count / (toc - tic) print('{:.2f}: {} x {} @ {:5.1f}'.format(time(), frame.shape[1], frame.shape[0], fps)) if toc -tic > 3: tic = time() frame_count = 0 # Show the resulting frame if args.visual: frame = cv2.resize(frame, (0, 0), fx=args.scale, fy=args.scale) cv2.imshow(args.filename, frame) if cv2.waitKey(1) & 0xFF == ord('q'): break except KeyboardInterrupt: print('\nKeyboardInterrupt') pass # When everything done, release the capture cam.release() cv2.destroyAllWindows()
def Predict(self, img_path, vis_thresh=0.3, output_img="output.jpg"): ''' User function: Run inference on multiple images and visualize them Args: img_path (str): Relative path to the image file vis_thresh (float): Threshold for predicted scores. Scores for objects detected below this score will not be displayed output_folder (str): Path to folder where output images will be saved Returns: None ''' image = cv2.imread(img_path) bboxes = self.system_dict["local"]["detector"](image) image = draw_bboxes(image, bboxes, thresh=vis_thresh) cv2.imwrite(output_img, image) return bboxes
#!/usr/bin/env python import cv2 from core.detectors import CornerNet_Saccade # from core.detectors import CornerNet_Squeeze from core.vis_utils import draw_bboxes from core.paths import get_file_path import os import pickle import pprint detector = CornerNet_Saccade() image = cv2.imread("./demo.jpg") bboxes = detector(image) pprint.pprint(bboxes) # 为了支持中文显示,对此做了修改 # 注意修改自己数据集的id2label字典 image = draw_bboxes(image, bboxes) cv2.imwrite("./demo_out.jpg", image)
if not cap.isOpened(): print("Camera is not ready ") exit() # 加载模型 detector = CornerNet_Squeeze() while True: ret, frame = cap.read() if not ret: pass #image = cv2.imread("demo.jpg") t0 = time.time() #bboxes = detector(image) bboxes = detector(frame) # 画框 image = draw_bboxes(frame, bboxes) #cv2.imwrite("demo_out.jpg", image) # 计算 FPS fps = 1.0 / (time.time() - t0) print("[FPS]: ", fps) cv2.imshow("cornernet", image) cv2.waitKey(1)
import cv2 import time from core.detectors import CornerNet_Saccade from core.detectors import CornerNet_Squeeze from core.detectors import CornerNet from core.vis_utils import draw_bboxes # detector = CornerNet() # detector1 = CornerNet_Saccade() detector2 = CornerNet_Squeeze() #### #### start = time.time() image = cv2.imread("demo.jpg") # bboxes = detector(image) # bboxes1 = detector1(image) bboxes2 = detector2(image) end = time.time() print("time:", end - start) # image = draw_bboxes(image, bboxes) # image1 = draw_bboxes(image, bboxes1) image2 = draw_bboxes(image, bboxes2) # cv2.imwrite("demo_out_corner.jpg", image) # cv2.imwrite("demo_out_saccade.jpg", image1) cv2.imwrite("demo_out_squeeze.jpg", image2)
def cam(arg, detector): count = 1 save_flag = False print(arg) current_path = os.getcwd() model_dirpath = current_path + "/model" clf = load_model(model_dirpath) if arg == "video": #cap = cv2.VideoCapture('/home/gisen/Documents/rosbag/2019-07-09-15-25-21.avi') cap = cv2.VideoCapture('/home/gisen/Documents/rosbag/out_short.mp4') width = int(cap.get(3)) height = int(cap.get(4)) writer = record(width, height) elif arg == "camera": cap = camera_open() cap.set(cv2.CAP_PROP_FPS, 60) # カメラFPSを60FPSに設定 cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # カメラ画像の横幅を1280に設定 cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # カメラ画像の縦幅を720に設定 elif arg == "make_color4train": cap = camera_open() cap.set(cv2.CAP_PROP_FPS, 60) # カメラFPSを60FPSに設定 cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # カメラ画像の横幅を1280に設定 cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # カメラ画像の縦幅を720に設定 data_path = "/home/gisen/Documents/own_dataset/traffic_light_dataset/traffic_light/*" imgs_path = load_color4train(data_path) save_flag = True #while True: while (cap.isOpened()): if arg == "video" or arg == "camera": # VideoCaptureから1フレーム読み込む ret, frame = cap.read() if not ret: print("画像の取得に失敗しました。") continue image, bboxes, bboxes_traffic, bboxes_pdstrn = obj_inference( detector, frame) traffic_trm_imges = [] pdstrn_trm_imges = [] trm_imges_dict = {} bboxes_dict = { "traffic_signal": bboxes_traffic, "pedestrian_signal": bboxes_pdstrn } result = {} if bboxes_traffic.shape[0] > 0: try: for bbox in bboxes_traffic: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) trm_img = image[y1:y2, x1:x2] traffic_trm_imges.append([trm_img]) except: print("交通信号機のトリミングを試みましたが失敗しました") if bboxes_pdstrn.shape[0] > 0: try: for bbox in bboxes_pdstrn: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) trm_img = image[y1:y2, x1:x2] pdstrn_trm_imges.append([trm_img]) except: print("歩行者信号機のトリミングを試みましたが失敗しました") trm_imges_dict["traffic_signal"] = traffic_trm_imges trm_imges_dict["pedestrian_signal"] = pdstrn_trm_imges if len(trm_imges_dict["traffic_signal"]) + len( trm_imges_dict["pedestrian_signal"]) > 0: for obj_name in ["traffic_signal", "pedestrian_signal"]: mass_list = [] bboxes_info = bboxes_dict[obj_name] res_data = extract_color_info(trm_imges_dict[obj_name]) #print("(r, g, b, h, s, v): ", res_data[0][4]) #Debug用 for input_data, bbox_info in zip(res_data, bboxes_info): chunk_list = [] input_data = np.array(input_data[4]) pred, label_name = inference(input_data, clf) #bboxes_info chunk_list = bbox_info.tolist() chunk_list.append(label_name) chunk_list.append("信号色の確率値入れる") mass_list.append(chunk_list) #print(np.array(mass_list)) #Debug用 result[obj_name] = mass_list image = draw_bboxes(image, result) print(result) del result if arg == "video": writer.write(image) # 画像を1フレーム分として書き込み # 加工なし画像を表示する cv2.imshow('Raw Frame', image) # キー入力でqを押したら終了する k = cv2.waitKey(1) if k == ord('q'): cv2.destroyAllWindows() sys.exit() else: for img_path in imgs_path: img_name = os.path.basename(img_path) img = cv2.imread(img_path) image, bboxes, _, _ = obj_inference(detector, img, count, image_name=img_name, flag=save_flag) count += 1 break # キャプチャをリリースして、ウィンドウをすべて閉じる cap.release() writer.release() cv2.destroyAllWindows()
num = num + 1 area = (bbx[2] - bbx[0]) * (bbx[3] - bbx[1]) scales.append(float((bbx[2] - bbx[0]) / (bbx[3] - bbx[1]))) areas.append(area) if num == 0: newbboxes[keys] = np.zeros((0, 5), dtype=np.float32) else: areas = np.array(areas, dtype=np.float32) areas = sorted(areas) if len(areas) >= 5: areas = np.delete(areas, 0, axis=0) areas = np.delete(areas, 0, axis=0) areas = np.delete(areas, len(areas) - 1, axis=0) areas = np.delete(areas, len(areas) - 1, axis=0) area_avg = areas.sum() / len(areas) else: area_avg = np.array(areas).sum() / len(areas) scales = np.array(scales, dtype=np.float32) scale_avg = scales.sum() / len(scales) dets = [] for bbx in bboxes[keys]: area = (bbx[2] - bbx[0]) * (bbx[3] - bbx[1]) scale = (bbx[2] - bbx[0]) / (bbx[3] - bbx[1]) if area <= 3.0 * area_avg and scale <= 1.5 * scale_avg: dets.append(bbx) dets = np.vstack(dets) newbboxes[keys] = np.array(dets, dtype=np.float32) image = draw_bboxes(image, newbboxes) cv2.imwrite("demo_out.jpg", image)