def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', 640, 480) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release() cv2.destroyAllWindows()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(args.model) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) mjpeg_server = MjpegServer(port=args.mjpeg_port) print('MJPEG server started...') try: loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, mjpeg_server=mjpeg_server) except Exception as e: print(e) finally: mjpeg_server.shutdown() cam.release()
def main(): args = parse_args() cam = Camera(args) cam.open() # import pdb # pdb.set_trace() if not cam.is_opened: sys.exit('[INFO] Failed to open camera!') cls_dict = get_cls_dict('coco') yolo_dim = int(args.model.split('-')[-1]) # 416 or 608 trt_yolov3 = TrtYOLOv3(args.model, (yolo_dim, yolo_dim)) print('[INFO] Camera: starting') cam.start() open_window(WINDOW_NAME, args.image_width, args.image_height, 'TensorRT YOLOv3 Detector') vis = BBoxVisualization(cls_dict) loop_and_detect(cam, args.runtime, trt_yolov3, conf_th=0.3, vis=vis, window_name=WINDOW_NAME) print('[INFO] Program: stopped') cam.stop() cam.release() cv2.destroyAllWindows()
def main(): print(f"{datetime.datetime.now().isoformat()} start!", flush=True) args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) # open_window( # WINDOW_NAME, 'Camera TensorRT YOLO Demo', # cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cap = cv2.VideoCapture(args.video_name) if (cap.isOpened() == False): print("unable to read read source video feed") cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) vis = BBoxVisualization(cls_dict) loop_and_detect(cap, trt_yolo, args.result_video, conf_th=0.3, vis=vis) cv2.destroyAllWindows()
def run_detection(): # args = parse_args() with open("cfg/detection_tracker_cfg.json") as detection_config: detect_config = json.load(detection_config) if detect_config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % detect_config["category_num"]) if not os.path.isfile('yolo/%s.trt' % detect_config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % detect_config["model"]) cap = cv2.VideoCapture(detect_config["source"]) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) cls_dict = get_cls_dict(detect_config["category_num"]) #vis = BBoxVisualization(cls_dict) h, w = get_input_shape(detect_config["model"]) trt_yolo = TrtYOLO(detect_config["model"], (h, w), detect_config["category_num"], detect_config["letter_box"]) ret, frame = cap.read() boxes, confs, clss = perform_detection(frame=frame, trt_yolo=trt_yolo, conf_th=0.3, vis=None)
def __init__(self, frame_info_pool=None, detection_result_pool=None, final_result_pool=None, model='yolov4-416', dataset='obstacle', score_threshold=0.5, nms_threshold=0.3): """ :param model: model name :param category_num: """ self.results = dict() self.model_name = model self.dataset = dataset self.frame_info_pool = frame_info_pool self.detection_result_pool = detection_result_pool self.final_result_pool = final_result_pool self.score_threshold = score_threshold self.nms_threshold = nms_threshold if dataset == "coco": category_num = 80 else : category_num = 15 self.cls_dict = get_cls_dict(category_num) yolo_dim = model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) self.model = TrtYOLO(model, (h, w), category_num) PrintLog.i("Object detection model is loaded - {}\t{}".format(model, dataset))
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(args.model) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) msg_queue = Queue(maxsize=100) # msg_queue.put("0,0,0,-1".encode()) Thread(target=serArd, args=(msg_queue, )).start() loop_and_detect(cam, trt_yolo, msg_queue, conf_th=0.7, vis=vis) while True: pass cam.release() cv2.destroyAllWindows()
def set_attribute(self, settings_attr): self.video_url = settings_attr["video_url"] self.analysis_fps = float(settings_attr["analysis_fps"]) self.display_delay = int(settings_attr["display_delay"]) self.server_url = settings_attr["server_url"] self.server_port = settings_attr["server_port"] self.dataset_index = settings_attr["dataset"] self.capture = cv2.VideoCapture(settings_attr["video_url"]) self.video_fps = float(self.capture.get(cv2.CAP_PROP_FPS)) if "://" in self.video_url: self.video_type = "streaming" else: self.video_type = "file" if self.dataset_index == 0: # obstacle self.cls_dict = get_cls_dict(15) elif self.dataset_index == 1: # mscoco self.cls_dict = get_cls_dict(80) self.bbox_visualization = BBoxVisualization(self.cls_dict)
def yolo_detection(): # dev = cuda.Device(0) # ctx = dev.make_context() args = parse_args() print(args) """ config assert """ if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/darknet/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/darknet/%s.trt) not found!' % args.model) cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) """ capture the image """ cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') """ deploy the yolo model """ trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) # open_window( # WINDOW_NAME, 'Camera TensorRT YOLO Demo', # cam.img_width, cam.img_height) """ detect the insulator using model """ vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) """ release the image """ cam.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit(f'ERROR: bad category_num ({args.category_num})!') if not os.path.isfile(args.model): raise SystemExit(f'ERROR: file {args.model} not found!') # Process valid coco json file process_valid_json(args.valid_coco) if args.write_images: if not os.path.exists(args.image_output): os.mkdir(args.image_output) # Create camera for video/image input cam = Camera(args) if not cam.get_is_opened(): raise SystemExit('ERROR: failed to open camera!') class_dict = get_cls_dict(args.category_num) yolo_dim = (args.model.replace(".trt", "")).split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!') w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!') # Create yolo trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) if args.activate_display: open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) visual = BBoxVisualization(class_dict) # Run detection loop_and_detect(cam, trt_yolo, args, confidence_thresh=args.confidence_threshold, visual=visual) # Clean up cam.release() if args.activate_display: cv2.destroyAllWindows()
def init_yolo(self): """ Initialises yolo parameters required for trt engine """ if self.model.find('-') == -1: self.model = self.model + "-" + self.input_shape yolo_dim = self.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) self.w, self.h = int(dim_split[0]), int(dim_split[1]) else: self.h = self.w = int(yolo_dim) if self.h % 32 != 0 or self.w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) cls_dict = get_cls_dict(self.category_num) self.vis = BBoxVisualization(cls_dict)
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) client = init_mqtt(args.host, args.port) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) print("cls_dict:", cls_dict) #print(cls_dict[3]) yolo_dim = args.model.split('-')[-1] print("yolo_dim:", yolo_dim) if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) print('w:{0}, h:{1}'.format(w, h)) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, \ cls_dict=cls_dict, client=client, topic=args.topic) cam.release() cv2.destroyAllWindows() client.disclose()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release() cv2.destroyAllWindows()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cap = cv2.VideoCapture(args.video) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) writer = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height)) cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) loop_and_detect(cap, trt_yolo, conf_th=0.3, vis=vis, writer=writer) writer.release() cap.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) vis = BBoxVisualization(cls_dict) mjpeg_server = MjpegServer(port=args.mjpeg_port) print('MJPEG server started...') try: loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, mjpeg_server=mjpeg_server) except Exception as e: print(e) finally: mjpeg_server.shutdown() cam.release()
def detect_and_alarm(): WINDOW_NAME = "Robot_YOLOv3_Detector_with_TensorRT" MODEL = "yolov3-tiny-416" IMAGE_WIDTH = 640 IMAGE_HEIGHT = 480 RUN_TIME = False global DETECT_TIME cam = Camera_for_Robot(video_dev=DETECT_ID, image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT) #cam = Camera_for_Robot(video_dev='./test.mp4', image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT) cam.open() if not cam.is_opened: print("Capture road opens failure") #return cam.start() open_window(WINDOW_NAME, IMAGE_WIDTH, IMAGE_HEIGHT, 'TensorRT YOLOv3 Detector') cls_dict = get_cls_dict('coco') yolo_dim = int(MODEL.split('-')[-1]) # 416 or 608 trt_yolov3 = TrtYOLOv3(model=MODEL, input_shape=(yolo_dim, yolo_dim)) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, RUN_TIME, trt_yolov3, conf_th=0.3, vis=vis, window_name=WINDOW_NAME, total_time=DETECT_TIME) cam.stop() cam.release() cv2.destroyAllWindows() print("Detection finishes successfully")
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) load_weight_start = time.time() trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) load_weights_time = datetime.timedelta(seconds=time.time() - load_weight_start) print('Load weights Time: %s' % (load_weights_time)) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) reporter = MemReporter() reporter.report() cam.release() cv2.destroyAllWindows()
'format=(string)NV12, framerate=(fraction)30/1 ! ' 'nvvidconv flip-method=2 ! ' 'video/x-raw, width=(int){}, height=(int){}, ' 'format=(string)BGRx ! ' 'videoconvert ! appsink').format(width, height) else: raise RuntimeError('onboard camera source not found!') cap = VideoCapture(gst_str) print("Start to load YoloV4 model") trt_yolo = TrtYOLO('yolov4_my-416', (416, 416), 4) print("YoloV4 model is loaded.") cls_dict = get_cls_dict(4) vis = BBoxVisualization(cls_dict) def detect_center(bboxes): center_x = (bboxes[0][0] / 416 + bboxes[0][2] / 416) / 2.0 - 0.5 center_y = (bboxes[0][1] / 416 + bboxes[0][3] / 416) / 2.0 - 0.5 return (center_x, center_y) speed = 0.5 turn_gain = 0.3 center = None bypass_number = 0 found_number = 0
def detect(config): COLOR_AROUND_DOOR = (48, 58, 221) COLOR_DOOR = (23, 158, 21) COLOR_LINE = (214, 4, 54) sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters door_array = [611, 70, 663, 310] around_door_array = [507, 24, 724, 374] low_border = 225 high_border = 342 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8083 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter() VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) # cap = cv2.VideoCapture(config["source"]) # if not cap.isOpened(): # raise SystemExit('ERROR: failed to open the input video file!') # frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') if webcam: torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) img = torch.zeros((3, imgsz, imgsz), device=device) # init img cls_dict = get_cls_dict(config["category_num"]) #vis = BBoxVisualization(cls_dict) vis = None h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (256, 256) for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): t0 = time.time() flag_move = False flag_anyone_in_door = False ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() if webcam: # batch_size >= 1 p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy( ) # TODO mb needed in loop for detection else: p, s, im0 = path, '', im0s preds, confs, clss = perform_detection( frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) scaled_pred = [] scaled_conf = [] detections = torch.Tensor() for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)): if det is not None and len(det): if names[int(cls)] not in config["needed_classes"]: continue det = xyxy_to_xywh(det) # det = scale_coords(img_shape, det, im0.shape) scaled_pred.append(det) scaled_conf.append(conf) detections = torch.Tensor(scaled_pred) confidences = torch.Tensor(scaled_conf) # Pass detections to deepsort if len(detections) != 0: outputs = deepsort.update(detections, confidences, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # print('bbox_xywh ', bbox_xywh, 'id', identities) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): ratio_initial = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_around_door) ratio_door = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_door) # чел первый раз в контуре двери if ratio_initial > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True if id_tracked not in counter.people_init or counter.people_init[ id_tracked] == 0: counter.obj_initialized(id_tracked) if ratio_door >= 0.2 and low_border < bbox_tracked[ 3] < high_border: # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif ratio_door < 0.2: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 # else: # # res is None, means that object is not in door contour # counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() if counter.need_to_clear(): counter.clear_all() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum( np.array([(door_c[i] - cur_c[i])**2 for i in range(len(door_c))])) ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_door) if val in lost_ids and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 5000: print('ratio in: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), COLOR_DOOR, 3) cv2.rectangle( im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), COLOR_AROUND_DOOR, 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (680, low_border), COLOR_LINE, 4) cv2.line(im0, (door_array[0], high_border), (680, high_border), COLOR_LINE, 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall( bytes( VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write( 'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n' .format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance, ratio_initial, ratio_door, ratio)) print('_________________video was sent _________________') VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img is True: cv2.imshow('image', im0) cv2.waitKey(1) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len(fpeses) < 15: fpeses.append(round(1 / delta_time)) print(delta_time) elif len(fpeses) == 15: # fps = round(np.median(np.array(fpeses))) median_fps = float(np.median(np.array(fpeses))) fps = round(median_fps, 2) print('max fps: ', fps) fps = 20 VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: if VideoHandler.flag_writing_video: print('\writing video ') if VideoHandler.flag_stop_writing: print('stop writing') if flag_anyone_in_door: print('anyone in door') if VideoHandler.counter_frames_indoor: print('counter frames indoor: {}'.format( VideoHandler.counter_frames_indoor))
import cv2 from time import sleep import time import subprocess as sp from utils.yolo_classes import get_cls_dict from utils.visualization import BBoxVisualization from utils.yolo_with_plugins import TrtYOLO import pycuda.autoinit cap = cv2.VideoCapture('rtsp://localhost:8554/stream_input') cls_dict = get_cls_dict(80) h = w = 288 model = 'yolov4-tiny-288' trt_yolo = TrtYOLO(model, (h, w)) vis = BBoxVisualization(cls_dict) rtsp_server_output = 'rtsp://localhost:8554/stream_output' command = ['ffmpeg', '-re', # '-s', sizeStr, # '-r', str(fps), # rtsp fps (from input server) # '-f', 'v4l2', '-i', '-', # You can change ffmpeg parameter after this item. # '-pix_fmt', 'yuv420p', # '-r', '30', # output fps # '-g', '50', # '-c:v', 'libx264', # '-b:v', '2M', # '-bufsize', '64M', # '-maxrate', "4M",
def detect(config): sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters # door_array = [528, 21, 581, 315] # door_array = [596, 76, 650, 295] # 18 stream door_array = [611, 70, 663, 310] # around_door_array = [572, 79, 694, 306] # # around_door_array = [470, 34, 722, 391] around_door_array = [507, 24, 724, 374] low_border = 225 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8084 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter(counter_in=0, counter_out=0, track_id=0) VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) cap = cv2.VideoCapture(config["source"]) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) cls_dict = get_cls_dict(config["category_num"]) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (288, 288) # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): while True: ret, im0 = cap.read() if not ret: break preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) flag_move = False flag_anyone_in_door = False t0 = time.time() ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)): if det is not None and len(det): # Rescale boxes from imgsz to im0 size # det = scale_coords(img_shape, det, im0.shape).round() if names[int(cls)] not in config["needed_classes"]: continue # bbox_xywh = [] # confs = [] # Write results if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(det, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(preds) confidences = torch.Tensor(confs) # Pass detections to deepsort if len(detections) == 0: continue outputs = deepsort.update(detections, confidences, im0) print('detections ', detections) print('outputs ', outputs) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) print('bbox_xyxy ', bbox_xyxy) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) inter_detection = rect_detection & rect_around_door if inter_detection: inter_square_detection = rect_square(*inter_detection) cur_square_detection = rect_square(*rect_detection) try: ratio_detection = inter_square_detection / cur_square_detection except ZeroDivisionError: ratio_detection = 0 # чел первый раз в контуре двери if ratio_detection > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1: # VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005) if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0: counter.obj_initialized(id_tracked) rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) intersection = rect_head & rect_door if intersection: intersection_square = rect_square(*intersection) head_square = rect_square(*rect_head) rat = intersection_square / head_square if rat >= 0.4 and bbox_tracked[3] > low_border : # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif rat < 0.4: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 else: # res is None, means that object is not in door contour counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() # Print time (inference + NMS) t2 = torch_utils.time_synchronized() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also inter = 0 cur_square = 0 ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))])) # init_c = find_centroid(counter.people_bbox[val]) # vector_person = (cur_c[0] - init_c[0], # cur_c[1] - init_c[1]) rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1], counter.cur_bbox[val][2], counter.cur_bbox[val][3]) inter = rect_cur & rect_door if val in lost_ids and counter.people_init[val] != -1: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 1000: print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), (48, 58, 221), 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance)) VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img: cv2.imshow('image', im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len (fpeses) < 30: fpeses.append(round(1 / delta_time)) elif len(fpeses) == 30: # fps = round(np.median(np.array(fpeses))) fps = np.median(np.array(fpeses)) # fps = 3 print('fps set: ', fps) VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: print('\nflag writing video: ', VideoHandler.flag_writing_video) print('flag stop writing: ', VideoHandler.flag_stop_writing) print('flag anyone in door: ', flag_anyone_in_door) print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
def run_detection(od_model, event_detectors, frame_dir, frame_path_list, fram_bbox_dir, json_dir, bbox_video_path): fourcc = cv2.VideoWriter_fourcc(*'DIVX') video_writer = cv2.VideoWriter(bbox_video_path, fourcc, 20, (640, 360)) frame_number = 0 event_results = [] cls_dict = get_cls_dict(15) bbox_visualization = BBoxVisualization(cls_dict) for i, frame_name in enumerate(frame_path_list): frame_number += 1 frame = cv2.imread(os.path.join(frame_dir, frame_name)) results = od_model.inference_by_image(frame) frame_bbox = bbox_visualization.draw_bboxes(frame, results) cv2.imwrite(os.path.join(fram_bbox_dir, frame_name), frame_bbox) video_writer.write(frame_bbox) dict_result = dict() dict_result["image_path"] = os.path.join(frame_dir, frame_name) dict_result["cam_address"] = video_path dict_result["module"] = od_model_name dict_result["frame_number"] = int(frame_number / extract_fps * fps) dict_result["timestamp"] = str( convert_framenumber2timestamp(frame_number / extract_fps * fps, fps)) dict_result["results"] = [] dict_result["results"].append({"detection_result": results}) event_result = dict() event_result["cam_address"] = video_path event_result["frame_number"] = int(frame_number / extract_fps * fps) event_result["timestamp"] = str( convert_framenumber2timestamp(frame_number / extract_fps * fps, fps)) event_result["event_result"] = dict() for event_detector in event_detectors: event_result["event_result"][ event_detector.model_name] = event_detector.inference( frame, dict_result) event_results.append(event_result) print( "\rframe number: {:>6}/{}\t/ extract frame number: {:>6}\t/ timestamp: {:>6}" .format( frame_number, len(frame_path_list), int(frame_number / extract_fps * fps), str( convert_framenumber2timestamp( frame_number / extract_fps * fps, fps))), end='') json_result_file = open( os.path.join(json_dir, frame_name.split(".jpg")[0] + ".json"), "w") json.dump(dict_result, json_result_file, indent=4) json_result_file.close() video_writer.release() print() PrintLog.i("Extraction is successfully completed(framecount: {})".format( frame_number)) if os.path.exists(bbox_video_path): PrintLog.i("BBox video is successfully generated(path: {})".format( bbox_video_path)) else: PrintLog.i("BBox video is failed to generated.") return event_results