def __init__(self, frame_info_pool=None, detection_result_pool=None, final_result_pool=None, model='yolov4-416', dataset='obstacle', score_threshold=0.5, nms_threshold=0.3): """ :param model: model name :param category_num: """ self.results = dict() self.model_name = model self.dataset = dataset self.frame_info_pool = frame_info_pool self.detection_result_pool = detection_result_pool self.final_result_pool = final_result_pool self.score_threshold = score_threshold self.nms_threshold = nms_threshold if dataset == "coco": category_num = 80 else : category_num = 15 self.cls_dict = get_cls_dict(category_num) yolo_dim = model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) self.model = TrtYOLO(model, (h, w), category_num) PrintLog.i("Object detection model is loaded - {}\t{}".format(model, dataset))
def __init__(self): """ Constructor """ self.bridge = CvBridge() self.init_params() self.init_yolo() self.cuda_ctx = cuda.Device(0).make_context() self.trt_yolo = TrtYOLO((self.model_path + self.model), (self.h, self.w), self.category_num)
def main(): args = parse_args() check_args(args) results_file = 'yolo/results_%s.json' % args.model yolo_dim = int(args.model.split('-')[-1]) # 288, 416 or 608 trt_yolo = TrtYOLO(args.model, (yolo_dim, yolo_dim), args.category_num) jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')] generate_results(trt_yolo, args.imgs_dir, jpgs, results_file, non_coco=args.non_coco) # Run COCO mAP evaluation # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO(args.annotations) cocoDt = cocoGt.loadRes(results_file) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds cocoEval.evaluate() cocoEval.accumulate() print(cocoEval.summarize())
def __init__(self, input_stream: cv.VideoCapture, camera_gps: np.ndarray): # todo: add verbosity/display mode flag """Constructor for ClassName""" self.input_stream = input_stream self.camera_gps = camera_gps self.normalized_gps_df = pd.DataFrame(dtype=np.float64) self.sampling_time = 0.05 # todo: change to use FPS # initialize constants self.SCALE_FACTOR = 1000.0 # yolo model specific constants self.target_name_dict = {0: 'TargetShip'} self.vis = BBoxVisualization(self.target_name_dict) self.conf_th = 0.5 self.display_mode = False # verbosity flag self.model = 'yolov4-416' self.model_height = 416 self.model_width = 416 self.model_shape = (self.model_height, self.model_width) self.category_number = 80 self.trt_yolo = TrtYOLO(self.model, self.model_shape, self.category_number) self.a_lim = 720.0 self.b_lim = 1280.0 / 2.0 # todo: load the models here and save them as constants super(AitasInterface, self).__init__()
def main(): print(f"{datetime.datetime.now().isoformat()} start!", flush=True) args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) # open_window( # WINDOW_NAME, 'Camera TensorRT YOLO Demo', # cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release()
def run_detection(): # args = parse_args() with open("cfg/detection_tracker_cfg.json") as detection_config: detect_config = json.load(detection_config) if detect_config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % detect_config["category_num"]) if not os.path.isfile('yolo/%s.trt' % detect_config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % detect_config["model"]) cap = cv2.VideoCapture(detect_config["source"]) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) cls_dict = get_cls_dict(detect_config["category_num"]) #vis = BBoxVisualization(cls_dict) h, w = get_input_shape(detect_config["model"]) trt_yolo = TrtYOLO(detect_config["model"], (h, w), detect_config["category_num"], detect_config["letter_box"]) ret, frame = cap.read() boxes, confs, clss = perform_detection(frame=frame, trt_yolo=trt_yolo, conf_th=0.3, vis=None)
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', 640, 480) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release() cv2.destroyAllWindows()
def main(): args = parse_args() check_args(args) if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) results_file = 'yolo/results_%s.json' % args.model trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')] generate_results(trt_yolo, args.imgs_dir, jpgs, results_file, non_coco=args.non_coco) # Run COCO mAP evaluation # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO(args.annotations) cocoDt = cocoGt.loadRes(results_file) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(args.model) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) msg_queue = Queue(maxsize=100) # msg_queue.put("0,0,0,-1".encode()) Thread(target=serArd, args=(msg_queue, )).start() loop_and_detect(cam, trt_yolo, msg_queue, conf_th=0.7, vis=vis) while True: pass cam.release() cv2.destroyAllWindows()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cap = cv2.VideoCapture(args.video_name) if (cap.isOpened() == False): print("unable to read read source video feed") cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) vis = BBoxVisualization(cls_dict) loop_and_detect(cap, trt_yolo, args.result_video, conf_th=0.3, vis=vis) cv2.destroyAllWindows()
def main(): args = parse_args() check_args(args) results_file = 'yolo/results_%s.json' % args.model yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')] generate_results(trt_yolo, args.imgs_dir, jpgs, results_file, non_coco=args.non_coco) # Run COCO mAP evaluation # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO(args.annotations) cocoDt = cocoGt.loadRes(results_file) imgIds = sorted(cocoGt.getImgIds()) cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(args.model) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) mjpeg_server = MjpegServer(port=args.mjpeg_port) print('MJPEG server started...') try: loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, mjpeg_server=mjpeg_server) except Exception as e: print(e) finally: mjpeg_server.shutdown() cam.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit(f'ERROR: bad category_num ({args.category_num})!') if not os.path.isfile(args.model): raise SystemExit(f'ERROR: file {args.model} not found!') # Process valid coco json file process_valid_json(args.valid_coco) if args.write_images: if not os.path.exists(args.image_output): os.mkdir(args.image_output) # Create camera for video/image input cam = Camera(args) if not cam.get_is_opened(): raise SystemExit('ERROR: failed to open camera!') class_dict = get_cls_dict(args.category_num) yolo_dim = (args.model.replace(".trt", "")).split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!') w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!') # Create yolo trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) if args.activate_display: open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) visual = BBoxVisualization(class_dict) # Run detection loop_and_detect(cam, trt_yolo, args, confidence_thresh=args.confidence_threshold, visual=visual) # Clean up cam.release() if args.activate_display: cv2.destroyAllWindows()
def yolo_detection(): # dev = cuda.Device(0) # ctx = dev.make_context() args = parse_args() print(args) """ config assert """ if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/darknet/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/darknet/%s.trt) not found!' % args.model) cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) """ capture the image """ cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') """ deploy the yolo model """ trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) # open_window( # WINDOW_NAME, 'Camera TensorRT YOLO Demo', # cam.img_width, cam.img_height) """ detect the insulator using model """ vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) """ release the image """ cam.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) client = init_mqtt(args.host, args.port) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) print("cls_dict:", cls_dict) #print(cls_dict[3]) yolo_dim = args.model.split('-')[-1] print("yolo_dim:", yolo_dim) if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) print('w:{0}, h:{1}'.format(w, h)) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, \ cls_dict=cls_dict, client=client, topic=args.topic) cam.release() cv2.destroyAllWindows() client.disclose()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) cam.release() cv2.destroyAllWindows()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cap = cv2.VideoCapture(args.video) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) writer = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height)) cls_dict = get_cls_dict(args.category_num) vis = BBoxVisualization(cls_dict) trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box) loop_and_detect(cap, trt_yolo, conf_th=0.3, vis=vis, writer=writer) writer.release() cap.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) vis = BBoxVisualization(cls_dict) mjpeg_server = MjpegServer(port=args.mjpeg_port) print('MJPEG server started...') try: loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, mjpeg_server=mjpeg_server) except Exception as e: print(e) finally: mjpeg_server.shutdown() cam.release()
def main(): args = parse_args() if args.category_num <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num) if not os.path.isfile('yolo/%s.trt' % args.model): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model) cam = Camera(args) if not cam.isOpened(): raise SystemExit('ERROR: failed to open camera!') cls_dict = get_cls_dict(args.category_num) yolo_dim = args.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) load_weight_start = time.time() trt_yolo = TrtYOLO(args.model, (h, w), args.category_num) load_weights_time = datetime.timedelta(seconds=time.time() - load_weight_start) print('Load weights Time: %s' % (load_weights_time)) open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis) reporter = MemReporter() reporter.report() cam.release() cv2.destroyAllWindows()
class YOLOv4: def __init__(self, frame_info_pool=None, detection_result_pool=None, final_result_pool=None, model='yolov4-416', dataset='obstacle'): """ :param model: model name :param category_num: """ self.results = dict() self.model_name = model self.dataset = dataset self.frame_info_pool = frame_info_pool self.detection_result_pool = detection_result_pool self.final_result_pool = final_result_pool if dataset == "coco": category_num = 80 else: category_num = 15 self.cls_dict = get_cls_dict(category_num) yolo_dim = model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) w, h = int(dim_split[0]), int(dim_split[1]) else: h = w = int(yolo_dim) if h % 32 != 0 or w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) self.model = TrtYOLO(model, (h, w), category_num) PrintLog.i("Object detection model is loaded - {}\t{}".format( model, dataset)) def inference_by_image(self, image, confidence_threshold=0.1): """ :param image: input image :param confidence_threshold: confidence/score threshold for object detection. :return: bounding box(x1, y1, x2, y2), scores, classes """ boxes, scores, classes = self.model.detect(image, confidence_threshold) results = [] for box, score, cls in zip(boxes, scores, classes): results.append({ "label": [{ "description": self.cls_dict.get(int(cls), 'CLS{}'.format(int(cls))), "score": float(score), "class_idx": int(cls) }], "position": { "x": int(box[0]), "y": int(box[1]), "w": int(box[2] - box[0]), "h": int(box[3] - box[1]) } }) return results def run(self): PrintLog.i("Frame Number\tTimestamp") while True: if len(self.frame_info_pool) > 0: frame_info = self.frame_info_pool.pop(0) result = self.inference_by_image(frame_info["frame"]) detection_result = dict() detection_result["cam_address"] = frame_info["cam_address"] detection_result["timestamp"] = frame_info["timestamp"] detection_result["frame_number"] = frame_info["frame_number"] detection_result["results"] = [] detection_result["results"].append( {"detection_result": result}) self.detection_result_pool.append(detection_result)
import cv2 from time import sleep import time import subprocess as sp from utils.yolo_classes import get_cls_dict from utils.visualization import BBoxVisualization from utils.yolo_with_plugins import TrtYOLO import pycuda.autoinit cap = cv2.VideoCapture('rtsp://localhost:8554/stream_input') cls_dict = get_cls_dict(80) h = w = 288 model = 'yolov4-tiny-288' trt_yolo = TrtYOLO(model, (h, w)) vis = BBoxVisualization(cls_dict) rtsp_server_output = 'rtsp://localhost:8554/stream_output' command = ['ffmpeg', '-re', # '-s', sizeStr, # '-r', str(fps), # rtsp fps (from input server) # '-f', 'v4l2', '-i', '-', # You can change ffmpeg parameter after this item. # '-pix_fmt', 'yuv420p', # '-r', '30', # output fps # '-g', '50', # '-c:v', 'libx264', # '-b:v', '2M', # '-bufsize', '64M', # '-maxrate', "4M",
def main(): # initialize camera class cam = Camera() if not cam.is_opened(): raise SystemExit('EROR: failed to open camera!') cls_dict = alprClassNames() # Yolo dimensions (416x416) yolo_dim = 416 h = w = int(yolo_dim) # Initialized model and tools cwd = os.getcwd() model_yolo = str(cwd) + '/weights/yolov4-tiny-416.trt' model_crnn = str(cwd) + '/weights/crnn.pth' trt_yolo = TrtYOLO(model_yolo, (h,w), category_num=1) # category number is number of classes crnn = alpr.AutoLPR(decoder='bestPath', normalise=True) crnn.load(crnn_path=model_crnn) open_window(WINDOW_NAME, TITLE, cam.img_width, cam.img_height) vis = BBoxVisualization(cls_dict) # Loop and detect full_scrn = False fps = 0.0 tic = time.time() while True: if cv2.getWindowProperty(WINDOW_NAME, 0) < 0: break img = cam.read() if img is None: break # Detect car plate boxes confs, clss = trt_yolo.detect(img, conf_th=0.5) # Crop and preprocess car plate cropped = vis.crop_plate(img, boxes, confs, clss) # Recognize car plate lp_plate = '' fileLocate = str(cwd) + '/detections/detection1.jpg' if os.path.exists(fileLocate): lp_plate = lpr.predict(fileLocate) # Draw boxes and fps img = vis.draw_bboxes(img, boxes, confs, clss, lp=lp_plate) img = show_fps(img, fps) # Show image cv2.imshow(WINDOW_NAME, img) # Calculate fps toc = time.time() curr_fps = 1.0 / (toc - tic) fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05) tic = toc # Exit key key = cv2.waitKey(1) if key == 27: # ESC key: quit program break # Release capture and destroy all windows cam.release() cv2.destroyAllWindows()
def detect(config): COLOR_AROUND_DOOR = (48, 58, 221) COLOR_DOOR = (23, 158, 21) COLOR_LINE = (214, 4, 54) sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters door_array = [611, 70, 663, 310] around_door_array = [507, 24, 724, 374] low_border = 225 high_border = 342 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8083 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter() VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else config["device"]) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) # cap = cv2.VideoCapture(config["source"]) # if not cap.isOpened(): # raise SystemExit('ERROR: failed to open the input video file!') # frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') if webcam: torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) img = torch.zeros((3, imgsz, imgsz), device=device) # init img cls_dict = get_cls_dict(config["category_num"]) #vis = BBoxVisualization(cls_dict) vis = None h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (256, 256) for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): t0 = time.time() flag_move = False flag_anyone_in_door = False ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() if webcam: # batch_size >= 1 p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy( ) # TODO mb needed in loop for detection else: p, s, im0 = path, '', im0s preds, confs, clss = perform_detection( frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) scaled_pred = [] scaled_conf = [] detections = torch.Tensor() for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)): if det is not None and len(det): if names[int(cls)] not in config["needed_classes"]: continue det = xyxy_to_xywh(det) # det = scale_coords(img_shape, det, im0.shape) scaled_pred.append(det) scaled_conf.append(conf) detections = torch.Tensor(scaled_pred) confidences = torch.Tensor(scaled_conf) # Pass detections to deepsort if len(detections) != 0: outputs = deepsort.update(detections, confidences, im0) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) # print('bbox_xywh ', bbox_xywh, 'id', identities) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): ratio_initial = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_around_door) ratio_door = find_ratio_ofbboxes( bbox=bbox_tracked, rect_compare=rect_door) # чел первый раз в контуре двери if ratio_initial > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True if id_tracked not in counter.people_init or counter.people_init[ id_tracked] == 0: counter.obj_initialized(id_tracked) if ratio_door >= 0.2 and low_border < bbox_tracked[ 3] < high_border: # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif ratio_door < 0.2: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 # else: # # res is None, means that object is not in door contour # counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() if counter.need_to_clear(): counter.clear_all() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum( np.array([(door_c[i] - cur_c[i])**2 for i in range(len(door_c))])) ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_door) if val in lost_ids and counter.people_init[val] != -1: # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 5000: print('ratio in: {}\n centroids: {}\n'.format( ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording( action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), COLOR_DOOR, 3) cv2.rectangle( im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), COLOR_AROUND_DOOR, 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (680, low_border), COLOR_LINE, 4) cv2.line(im0, (door_array[0], high_border), (680, high_border), COLOR_LINE, 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall( bytes( VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write( 'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n' .format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance, ratio_initial, ratio_door, ratio)) print('_________________video was sent _________________') VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img is True: cv2.imshow('image', im0) cv2.waitKey(1) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len(fpeses) < 15: fpeses.append(round(1 / delta_time)) print(delta_time) elif len(fpeses) == 15: # fps = round(np.median(np.array(fpeses))) median_fps = float(np.median(np.array(fpeses))) fps = round(median_fps, 2) print('max fps: ', fps) fps = 20 VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: if VideoHandler.flag_writing_video: print('\writing video ') if VideoHandler.flag_stop_writing: print('stop writing') if flag_anyone_in_door: print('anyone in door') if VideoHandler.counter_frames_indoor: print('counter frames indoor: {}'.format( VideoHandler.counter_frames_indoor))
def detect(config): sent_videos = set() video_name = "" fpeses = [] fps = 0 # door_array = select_object() # door_array = [475, 69, 557, 258] global flag, vid_writer, lost_ids # initial parameters # door_array = [528, 21, 581, 315] # door_array = [596, 76, 650, 295] # 18 stream door_array = [611, 70, 663, 310] # around_door_array = [572, 79, 694, 306] # # around_door_array = [470, 34, 722, 391] around_door_array = [507, 24, 724, 374] low_border = 225 # door_c = find_centroid(door_array) rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3]) rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3]) # socket HOST = "localhost" PORT = 8084 # camera info save_img = True imgsz = (416, 416) if ONNX_EXPORT else config[ "img_size"] # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \ config["half"], config["view_img"] webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(config["config_deepsort"]) # initial objects of classes counter = Counter(counter_in=0, counter_out=0, track_id=0) VideoHandler = Writer() deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device, weights etc. if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize colors names = load_classes(config["names"]) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] if config["category_num"] <= 0: raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"]) if not os.path.isfile('yolo/%s.trt' % config["model"]): raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"]) cap = cv2.VideoCapture(config["source"]) if not cap.isOpened(): raise SystemExit('ERROR: failed to open the input video file!') frame_width, frame_height = int(cap.get(3)), int(cap.get(4)) cls_dict = get_cls_dict(config["category_num"]) vis = BBoxVisualization(cls_dict) h, w = get_input_shape(config["model"]) trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"]) with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((HOST, PORT)) img_shape = (288, 288) # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): while True: ret, im0 = cap.read() if not ret: break preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis) flag_move = False flag_anyone_in_door = False t0 = time.time() ratio_detection = 0 # Process detections lost_ids = counter.return_lost_ids() for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)): if det is not None and len(det): # Rescale boxes from imgsz to im0 size # det = scale_coords(img_shape, det, im0.shape).round() if names[int(cls)] not in config["needed_classes"]: continue # bbox_xywh = [] # confs = [] # Write results if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(det, im0, label=label, color=colors[int(cls)]) detections = torch.Tensor(preds) confidences = torch.Tensor(confs) # Pass detections to deepsort if len(detections) == 0: continue outputs = deepsort.update(detections, confidences, im0) print('detections ', detections) print('outputs ', outputs) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] draw_boxes(im0, bbox_xyxy, identities) print('bbox_xyxy ', bbox_xyxy) counter.update_identities(identities) for bbox_tracked, id_tracked in zip(bbox_xyxy, identities): rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) inter_detection = rect_detection & rect_around_door if inter_detection: inter_square_detection = rect_square(*inter_detection) cur_square_detection = rect_square(*rect_detection) try: ratio_detection = inter_square_detection / cur_square_detection except ZeroDivisionError: ratio_detection = 0 # чел первый раз в контуре двери if ratio_detection > 0.2: if VideoHandler.counter_frames_indoor == 0: # флаг о начале записи VideoHandler.start_video(id_tracked) flag_anyone_in_door = True elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected: VideoHandler.continue_opened_video(id=id_tracked, seconds=3) flag_anyone_in_door = True # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1: # VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005) if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0: counter.obj_initialized(id_tracked) rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2], bbox_tracked[3]) intersection = rect_head & rect_door if intersection: intersection_square = rect_square(*intersection) head_square = rect_square(*rect_head) rat = intersection_square / head_square if rat >= 0.4 and bbox_tracked[3] > low_border : # was initialized in door, probably going out of office counter.people_init[id_tracked] = 2 elif rat < 0.4: # initialized in the corridor, mb going in counter.people_init[id_tracked] = 1 else: # res is None, means that object is not in door contour counter.people_init[id_tracked] = 1 counter.frame_age_counter[id_tracked] = 0 counter.people_bbox[id_tracked] = bbox_tracked counter.cur_bbox[id_tracked] = bbox_tracked else: deepsort.increment_ages() # Print time (inference + NMS) t2 = torch_utils.time_synchronized() # Stream results vals_to_del = [] for val in counter.people_init.keys(): # check bbox also inter = 0 cur_square = 0 ratio = 0 cur_c = find_centroid(counter.cur_bbox[val]) centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))])) # init_c = find_centroid(counter.people_bbox[val]) # vector_person = (cur_c[0] - init_c[0], # cur_c[1] - init_c[1]) rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1], counter.cur_bbox[val][2], counter.cur_bbox[val][3]) inter = rect_cur & rect_door if val in lost_ids and counter.people_init[val] != -1: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 # if vector_person < 0 then current coord is less than initialized, it means that man is going # in the exit direction if counter.people_init[val] == 2 \ and ratio < 0.4 and centroid_distance > 5000: print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_out() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел из кабинета") vals_to_del.append(val) elif counter.people_init[val] == 1 \ and ratio >= 0.4 and centroid_distance < 1000: print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance)) counter.get_in() counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="зашел внутрь") vals_to_del.append(val) lost_ids.remove(val) # TODO maybe delete this condition elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \ and counter.people_init[val] == 2: if inter: inter_square = rect_square(*inter) cur_square = rect_square(*rect_cur) try: ratio = inter_square / cur_square except ZeroDivisionError: ratio = 0 if ratio < 0.2 and centroid_distance > 10000: counter.get_out() print('ratio out max frames: ', ratio) counter.people_init[val] = -1 VideoHandler.stop_recording(action_occured="вышел") vals_to_del.append(val) counter.age_counter[val] = 0 counter.clear_lost_ids() for valtodel in vals_to_del: counter.delete_person_data(track_id=valtodel) ins, outs = counter.show_counter() cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8) cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])), (int(door_array[2]), int(door_array[3])), (23, 158, 21), 3) cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])), (int(around_door_array[2]), int(around_door_array[3])), (48, 58, 221), 3) cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0, 1e-3 * im0.shape[0], (255, 255, 255), 3) cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4) if VideoHandler.stop_writing(im0): # send_new_posts(video_name, action_occured) sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8")) data = sock.recv(100) print('Received', repr(data.decode("utf-8"))) sent_videos.add(VideoHandler.video_name) with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr: wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance)) VideoHandler = Writer() VideoHandler.set_fps(fps) else: VideoHandler.continue_writing(im0, flag_anyone_in_door) if view_img: cv2.imshow('image', im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration delta_time = (time.time() - t0) # t2_ds = time.time() # print('%s Torch:. (%.3fs)' % (s, t2 - t1)) # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds)) if len (fpeses) < 30: fpeses.append(round(1 / delta_time)) elif len(fpeses) == 30: # fps = round(np.median(np.array(fpeses))) fps = np.median(np.array(fpeses)) # fps = 3 print('fps set: ', fps) VideoHandler.set_fps(fps) counter.set_fps(fps) fpeses.append(fps) motion_detection = True else: print('\nflag writing video: ', VideoHandler.flag_writing_video) print('flag stop writing: ', VideoHandler.flag_stop_writing) print('flag anyone in door: ', flag_anyone_in_door) print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
class yolov4(object): def __init__(self): """ Constructor """ self.bridge = CvBridge() self.init_params() self.init_yolo() self.cuda_ctx = cuda.Device(0).make_context() self.trt_yolo = TrtYOLO((self.model_path + self.model), (self.h, self.w), self.category_num) def __del__(self): """ Destructor """ self.cuda_ctx.pop() del self.trt_yolo del self.cuda_ctx def clean_up(self): """ Backup destructor: Release cuda memory """ if self.trt_yolo is not None: self.cuda_ctx.pop() del self.trt_yolo del self.cuda_ctx def init_params(self): """ Initializes ros parameters """ rospack = rospkg.RosPack() package_path = rospack.get_path("yolov4_trt_ros") self.video_topic = rospy.get_param("/video_topic", "/video_source/raw") self.model = rospy.get_param("/model", "yolov4") self.model_path = rospy.get_param("/model_path", package_path + "/yolo/") self.input_shape = rospy.get_param("/input_shape", "416") self.category_num = rospy.get_param("/category_number", 80) self.conf_th = rospy.get_param("/confidence_threshold", 0.5) self.show_img = rospy.get_param("/show_image", True) self.image_sub = rospy.Subscriber(self.video_topic, Image, self.img_callback, queue_size=1, buff_size=1920 * 1080 * 3) self.detection_pub = rospy.Publisher("detections", Detector2DArray, queue_size=1) self.overlay_pub = rospy.Publisher("/result/overlay", Image, queue_size=1) def init_yolo(self): """ Initialises yolo parameters required for trt engine """ if self.model.find('-') == -1: self.model = self.model + "-" + self.input_shape yolo_dim = self.model.split('-')[-1] if 'x' in yolo_dim: dim_split = yolo_dim.split('x') if len(dim_split) != 2: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) self.w, self.h = int(dim_split[0]), int(dim_split[1]) else: self.h = self.w = int(yolo_dim) if self.h % 32 != 0 or self.w % 32 != 0: raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim) cls_dict = get_cls_dict(self.category_num) self.vis = BBoxVisualization(cls_dict) def img_callback(self, ros_img): """Continuously capture images from camera and do object detection """ tic = time.time() # converts from ros_img to cv_img for processing try: cv_img = self.bridge.imgmsg_to_cv2(ros_img, desired_encoding="bgr8") rospy.logdebug("ROS Image converted for processing") except CvBridgeError as e: rospy.loginfo("Failed to convert image %s", str(e)) if cv_img is not None: boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th) cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss) toc = time.time() fps = 1.0 / (toc - tic) self.publisher(boxes, confs, clss) if self.show_img: cv_img = show_fps(cv_img, fps) cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img) cv2.waitKey(1) # converts back to ros_img type for publishing try: overlay_img = self.bridge.cv2_to_imgmsg(cv_img, encoding="passthrough") rospy.logdebug("CV Image converted for publishing") self.overlay_pub.publish(overlay_img) except CvBridgeError as e: rospy.loginfo("Failed to convert image %s", str(e)) def publisher(self, boxes, confs, clss): """ Publishes to detector_msgs Parameters: boxes (List(List(int))) : Bounding boxes of all objects confs (List(double)) : Probability scores of all objects clss (List(int)) : Class ID of all classes """ detection2d = Detector2DArray() detection = Detector2D() for i in range(len(boxes)): # boxes : xmin, ymin, xmax, ymax for _ in boxes: detection.header.stamp = rospy.Time.now() detection.header.frame_id = "camera" # change accordingly detection.results.id = clss[i] detection.results.score = confs[i] detection.bbox.center.x = (boxes[i][2] - boxes[i][0]) / 2 detection.bbox.center.y = (boxes[i][3] - boxes[i][1]) / 2 detection.bbox.center.theta = 0.0 # change if required detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2]) detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3]) detection2d.detections.append(detection) self.detection_pub.publish(detection2d)
elif 'nvarguscamerasrc' in gst_elements: gst_str = ('nvarguscamerasrc ! ' 'video/x-raw(memory:NVMM), ' 'width=(int)1920, height=(int)1080, ' 'format=(string)NV12, framerate=(fraction)30/1 ! ' 'nvvidconv flip-method=2 ! ' 'video/x-raw, width=(int){}, height=(int){}, ' 'format=(string)BGRx ! ' 'videoconvert ! appsink').format(width, height) else: raise RuntimeError('onboard camera source not found!') cap = VideoCapture(gst_str) print("Start to load YoloV4 model") trt_yolo = TrtYOLO('yolov4_my-416', (416, 416), 4) print("YoloV4 model is loaded.") cls_dict = get_cls_dict(4) vis = BBoxVisualization(cls_dict) def detect_center(bboxes): center_x = (bboxes[0][0] / 416 + bboxes[0][2] / 416) / 2.0 - 0.5 center_y = (bboxes[0][1] / 416 + bboxes[0][3] / 416) / 2.0 - 0.5 return (center_x, center_y) speed = 0.5 turn_gain = 0.3
# Added by Adriano Santos import cv2 import time import pycuda.autoinit # This is needed for initializing CUDA driver from utils.yolo_with_plugins import TrtYOLO from utils.yolo_classes import get_class_name # Parameters to test conf_th = 0.5 nms_threshold = 0.5 opt_device = '0,1' color = (255, 0, 0) # Create a model instance trt_yolo = TrtYOLO("yolov3-spp3-416", (416, 416), 1) def main(video): # Start the fake camera camera = cv2.VideoCapture(video) while camera.isOpened(): # Get a frame (status, frame) = camera.read() # Stop conditions key = cv2.waitKey(1) & 0xFF if (status == False or key == ord("q")): break