Beispiel #1
0
    def __init__(self, frame_info_pool=None, detection_result_pool=None, final_result_pool=None, model='yolov4-416', dataset='obstacle', score_threshold=0.5, nms_threshold=0.3):
        """
        :param model: model name
        :param category_num:
        """
        self.results = dict()
        self.model_name = model
        self.dataset = dataset
        self.frame_info_pool = frame_info_pool
        self.detection_result_pool = detection_result_pool
        self.final_result_pool = final_result_pool
        self.score_threshold = score_threshold
        self.nms_threshold = nms_threshold
        if dataset == "coco":
            category_num = 80
        else :
            category_num = 15
        self.cls_dict = get_cls_dict(category_num)

        yolo_dim = model.split('-')[-1]
        if 'x' in yolo_dim:
            dim_split = yolo_dim.split('x')
            if len(dim_split) != 2:
                raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
            w, h = int(dim_split[0]), int(dim_split[1])
        else:
            h = w = int(yolo_dim)
        if h % 32 != 0 or w % 32 != 0:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

        self.model = TrtYOLO(model, (h, w), category_num)

        PrintLog.i("Object detection model is loaded - {}\t{}".format(model, dataset))
Beispiel #2
0
    def __init__(self):
        """ Constructor """

        self.bridge = CvBridge()
        self.init_params()
        self.init_yolo()
        self.cuda_ctx = cuda.Device(0).make_context()
        self.trt_yolo = TrtYOLO((self.model_path + self.model),
                                (self.h, self.w), self.category_num)
Beispiel #3
0
def main():
    args = parse_args()
    check_args(args)

    results_file = 'yolo/results_%s.json' % args.model
    yolo_dim = int(args.model.split('-')[-1])  # 288, 416 or 608
    trt_yolo = TrtYOLO(args.model, (yolo_dim, yolo_dim), args.category_num)

    jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')]
    generate_results(trt_yolo,
                     args.imgs_dir,
                     jpgs,
                     results_file,
                     non_coco=args.non_coco)

    # Run COCO mAP evaluation
    # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
    cocoGt = COCO(args.annotations)
    cocoDt = cocoGt.loadRes(results_file)
    imgIds = sorted(cocoGt.getImgIds())
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.imgIds = imgIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    print(cocoEval.summarize())
Beispiel #4
0
    def __init__(self, input_stream: cv.VideoCapture, camera_gps: np.ndarray):
        # todo: add verbosity/display mode flag
        """Constructor for ClassName"""
        self.input_stream = input_stream
        self.camera_gps = camera_gps
        self.normalized_gps_df = pd.DataFrame(dtype=np.float64)
        self.sampling_time = 0.05  # todo: change to use FPS

        # initialize constants
        self.SCALE_FACTOR = 1000.0

        # yolo model specific constants
        self.target_name_dict = {0: 'TargetShip'}
        self.vis = BBoxVisualization(self.target_name_dict)
        self.conf_th = 0.5
        self.display_mode = False  # verbosity flag
        self.model = 'yolov4-416'
        self.model_height = 416
        self.model_width = 416
        self.model_shape = (self.model_height, self.model_width)
        self.category_number = 80
        self.trt_yolo = TrtYOLO(self.model, self.model_shape,
                                self.category_number)

        self.a_lim = 720.0
        self.b_lim = 1280.0 / 2.0
        # todo: load the models here and save them as constants
        super(AitasInterface, self).__init__()
Beispiel #5
0
def main():
    print(f"{datetime.datetime.now().isoformat()} start!", flush=True)
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    # open_window(
    #     WINDOW_NAME, 'Camera TensorRT YOLO Demo',
    #     cam.img_width, cam.img_height)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
def run_detection():
    # args = parse_args()
    with open("cfg/detection_tracker_cfg.json") as detection_config:
        detect_config = json.load(detection_config)

    if detect_config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' %
                         detect_config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % detect_config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' %
                         detect_config["model"])

    cap = cv2.VideoCapture(detect_config["source"])
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

    cls_dict = get_cls_dict(detect_config["category_num"])
    #vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(detect_config["model"])
    trt_yolo = TrtYOLO(detect_config["model"], (h, w),
                       detect_config["category_num"],
                       detect_config["letter_box"])
    ret, frame = cap.read()
    boxes, confs, clss = perform_detection(frame=frame,
                                           trt_yolo=trt_yolo,
                                           conf_th=0.3,
                                           vis=None)
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', 640, 480)

    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
    cv2.destroyAllWindows()
Beispiel #8
0
def main():
    args = parse_args()
    check_args(args)
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    results_file = 'yolo/results_%s.json' % args.model

    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)

    jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')]
    generate_results(trt_yolo,
                     args.imgs_dir,
                     jpgs,
                     results_file,
                     non_coco=args.non_coco)

    # Run COCO mAP evaluation
    # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
    cocoGt = COCO(args.annotations)
    cocoDt = cocoGt.loadRes(results_file)
    imgIds = sorted(cocoGt.getImgIds())
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.imgIds = imgIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(args.model)
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)

    msg_queue = Queue(maxsize=100)

    # msg_queue.put("0,0,0,-1".encode())
    Thread(target=serArd, args=(msg_queue, )).start()
    loop_and_detect(cam, trt_yolo, msg_queue, conf_th=0.7, vis=vis)
    while True:
        pass

    cam.release()
    cv2.destroyAllWindows()
Beispiel #10
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cap = cv2.VideoCapture(args.video_name)
    if (cap.isOpened() == False):
        print("unable to read read source video feed")

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cap, trt_yolo, args.result_video, conf_th=0.3, vis=vis)

    cv2.destroyAllWindows()
def main():
    args = parse_args()
    check_args(args)

    results_file = 'yolo/results_%s.json' % args.model

    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    jpgs = [j for j in os.listdir(args.imgs_dir) if j.endswith('.jpg')]
    generate_results(trt_yolo, args.imgs_dir, jpgs, results_file,
                     non_coco=args.non_coco)

    # Run COCO mAP evaluation
    # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
    cocoGt = COCO(args.annotations)
    cocoDt = cocoGt.loadRes(results_file)
    imgIds = sorted(cocoGt.getImgIds())
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.imgIds = imgIds
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(args.model)
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    mjpeg_server = MjpegServer(port=args.mjpeg_port)
    print('MJPEG server started...')
    try:
        loop_and_detect(cam,
                        trt_yolo,
                        conf_th=0.3,
                        vis=vis,
                        mjpeg_server=mjpeg_server)
    except Exception as e:
        print(e)
    finally:
        mjpeg_server.shutdown()
        cam.release()
Beispiel #13
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit(f'ERROR: bad category_num ({args.category_num})!')
    if not os.path.isfile(args.model):
        raise SystemExit(f'ERROR: file {args.model} not found!')

    # Process valid coco json file
    process_valid_json(args.valid_coco)

    if args.write_images:
        if not os.path.exists(args.image_output): os.mkdir(args.image_output)

    # Create camera for video/image input
    cam = Camera(args)
    if not cam.get_is_opened():
        raise SystemExit('ERROR: failed to open camera!')

    class_dict = get_cls_dict(args.category_num)
    yolo_dim = (args.model.replace(".trt", "")).split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!')
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit(f'ERROR: bad yolo_dim ({yolo_dim})!')

    # Create yolo
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    if args.activate_display:
        open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                    cam.img_height)
    visual = BBoxVisualization(class_dict)

    # Run detection
    loop_and_detect(cam,
                    trt_yolo,
                    args,
                    confidence_thresh=args.confidence_threshold,
                    visual=visual)

    # Clean up
    cam.release()
    if args.activate_display:
        cv2.destroyAllWindows()
Beispiel #14
0
def yolo_detection():
    # dev = cuda.Device(0)
    # ctx = dev.make_context()
    args = parse_args()
    print(args)
    """
    config  assert
    """
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/darknet/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/darknet/%s.trt) not found!' %
                         args.model)
    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]

    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
    """
    capture the image
    """
    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')
    """
    deploy the yolo model
    """
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    # open_window(
    #     WINDOW_NAME, 'Camera TensorRT YOLO Demo',
    #     cam.img_width, cam.img_height)
    """
    detect the insulator using model
    """
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
    """
    release the image
    """

    cam.release()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    client = init_mqtt(args.host, args.port)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    print("cls_dict:", cls_dict)
    #print(cls_dict[3])
    yolo_dim = args.model.split('-')[-1]
    print("yolo_dim:", yolo_dim)
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
        print('w:{0}, h:{1}'.format(w, h))
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    vis = BBoxVisualization(cls_dict)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis, \
        cls_dict=cls_dict, client=client, topic=args.topic)

    cam.release()
    cv2.destroyAllWindows()

    client.disclose()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)

    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)

    cam.release()
    cv2.destroyAllWindows()
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cap = cv2.VideoCapture(args.video)
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    writer = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*'mp4v'), 30,
                             (frame_width, frame_height))

    cls_dict = get_cls_dict(args.category_num)
    vis = BBoxVisualization(cls_dict)
    trt_yolo = TrtYOLO(args.model, args.category_num, args.letter_box)

    loop_and_detect(cap, trt_yolo, conf_th=0.3, vis=vis, writer=writer)

    writer.release()
    cap.release()
Beispiel #18
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)

    vis = BBoxVisualization(cls_dict)
    mjpeg_server = MjpegServer(port=args.mjpeg_port)
    print('MJPEG server started...')
    try:
        loop_and_detect(cam,
                        trt_yolo,
                        conf_th=0.3,
                        vis=vis,
                        mjpeg_server=mjpeg_server)
    except Exception as e:
        print(e)
    finally:
        mjpeg_server.shutdown()
        cam.release()
Beispiel #19
0
def main():
    args = parse_args()
    if args.category_num <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % args.category_num)
    if not os.path.isfile('yolo/%s.trt' % args.model):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % args.model)

    cam = Camera(args)
    if not cam.isOpened():
        raise SystemExit('ERROR: failed to open camera!')

    cls_dict = get_cls_dict(args.category_num)
    yolo_dim = args.model.split('-')[-1]
    if 'x' in yolo_dim:
        dim_split = yolo_dim.split('x')
        if len(dim_split) != 2:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
        w, h = int(dim_split[0]), int(dim_split[1])
    else:
        h = w = int(yolo_dim)
    if h % 32 != 0 or w % 32 != 0:
        raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

    load_weight_start = time.time()
    trt_yolo = TrtYOLO(args.model, (h, w), args.category_num)
    load_weights_time = datetime.timedelta(seconds=time.time() -
                                           load_weight_start)
    print('Load weights Time: %s' % (load_weights_time))
    open_window(WINDOW_NAME, 'Camera TensorRT YOLO Demo', cam.img_width,
                cam.img_height)
    vis = BBoxVisualization(cls_dict)

    loop_and_detect(cam, trt_yolo, conf_th=0.3, vis=vis)
    reporter = MemReporter()
    reporter.report()
    cam.release()
    cv2.destroyAllWindows()
Beispiel #20
0
class YOLOv4:
    def __init__(self,
                 frame_info_pool=None,
                 detection_result_pool=None,
                 final_result_pool=None,
                 model='yolov4-416',
                 dataset='obstacle'):
        """
        :param model: model name
        :param category_num:
        """
        self.results = dict()
        self.model_name = model
        self.dataset = dataset
        self.frame_info_pool = frame_info_pool
        self.detection_result_pool = detection_result_pool
        self.final_result_pool = final_result_pool
        if dataset == "coco":
            category_num = 80
        else:
            category_num = 15
        self.cls_dict = get_cls_dict(category_num)

        yolo_dim = model.split('-')[-1]
        if 'x' in yolo_dim:
            dim_split = yolo_dim.split('x')
            if len(dim_split) != 2:
                raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
            w, h = int(dim_split[0]), int(dim_split[1])
        else:
            h = w = int(yolo_dim)
        if h % 32 != 0 or w % 32 != 0:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

        self.model = TrtYOLO(model, (h, w), category_num)

        PrintLog.i("Object detection model is loaded - {}\t{}".format(
            model, dataset))

    def inference_by_image(self, image, confidence_threshold=0.1):
        """
        :param image: input image
        :param confidence_threshold: confidence/score threshold for object detection.
        :return: bounding box(x1, y1, x2, y2), scores, classes
        """
        boxes, scores, classes = self.model.detect(image, confidence_threshold)
        results = []
        for box, score, cls in zip(boxes, scores, classes):
            results.append({
                "label": [{
                    "description":
                    self.cls_dict.get(int(cls), 'CLS{}'.format(int(cls))),
                    "score":
                    float(score),
                    "class_idx":
                    int(cls)
                }],
                "position": {
                    "x": int(box[0]),
                    "y": int(box[1]),
                    "w": int(box[2] - box[0]),
                    "h": int(box[3] - box[1])
                }
            })

        return results

    def run(self):
        PrintLog.i("Frame Number\tTimestamp")
        while True:
            if len(self.frame_info_pool) > 0:
                frame_info = self.frame_info_pool.pop(0)
                result = self.inference_by_image(frame_info["frame"])
                detection_result = dict()
                detection_result["cam_address"] = frame_info["cam_address"]
                detection_result["timestamp"] = frame_info["timestamp"]
                detection_result["frame_number"] = frame_info["frame_number"]
                detection_result["results"] = []
                detection_result["results"].append(
                    {"detection_result": result})
                self.detection_result_pool.append(detection_result)
import cv2
from time import sleep
import time
import subprocess as sp
from utils.yolo_classes import get_cls_dict
from utils.visualization import BBoxVisualization
from utils.yolo_with_plugins import TrtYOLO
import pycuda.autoinit

cap = cv2.VideoCapture('rtsp://localhost:8554/stream_input')
cls_dict = get_cls_dict(80)
h = w = 288
model = 'yolov4-tiny-288'
trt_yolo = TrtYOLO(model, (h, w))
vis = BBoxVisualization(cls_dict)
rtsp_server_output = 'rtsp://localhost:8554/stream_output'
command = ['ffmpeg',
               '-re',
               # '-s', sizeStr,
               # '-r', str(fps),  # rtsp fps (from input server)
               # '-f', 'v4l2',
               '-i', '-',

               # You can change ffmpeg parameter after this item.
               # '-pix_fmt', 'yuv420p',
               # '-r', '30',  # output fps
               # '-g', '50',
               # '-c:v', 'libx264',
               # '-b:v', '2M',
               # '-bufsize', '64M',
               # '-maxrate', "4M",
Beispiel #22
0
def main():
    # initialize camera class
    cam = Camera()
    if not cam.is_opened():
        raise SystemExit('EROR: failed to open camera!')
    cls_dict = alprClassNames()
    
    # Yolo dimensions (416x416)
    yolo_dim = 416
    h = w = int(yolo_dim)

    # Initialized model and tools
    cwd = os.getcwd()
    model_yolo = str(cwd) + '/weights/yolov4-tiny-416.trt'
    model_crnn = str(cwd) + '/weights/crnn.pth'
    trt_yolo = TrtYOLO(model_yolo, (h,w), category_num=1) # category number is number of classes
    crnn = alpr.AutoLPR(decoder='bestPath', normalise=True)
    crnn.load(crnn_path=model_crnn)
    open_window(WINDOW_NAME, TITLE, cam.img_width, cam.img_height)
    vis = BBoxVisualization(cls_dict)

    # Loop and detect
    full_scrn = False
    fps = 0.0 
    tic = time.time()
    while True:
        if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
            break
        img = cam.read()
        if img is None:
            break
        # Detect car plate
        boxes confs, clss = trt_yolo.detect(img, conf_th=0.5)
        # Crop and preprocess car plate 
        cropped = vis.crop_plate(img, boxes, confs, clss)
        # Recognize car plate
        lp_plate = ''
        fileLocate = str(cwd) + '/detections/detection1.jpg'
        if os.path.exists(fileLocate):
           lp_plate = lpr.predict(fileLocate)

        # Draw boxes and fps
        img = vis.draw_bboxes(img, boxes, confs, clss, lp=lp_plate)
        img = show_fps(img, fps)

        # Show image
        cv2.imshow(WINDOW_NAME, img)

        # Calculate fps
        toc = time.time()
        curr_fps = 1.0 / (toc - tic)
        fps = curr_fps if fps == 0.0 else (fps*0.95 + curr_fps*0.05)
        tic = toc

        # Exit key
        key = cv2.waitKey(1)
        if key == 27:  # ESC key: quit program
            break

    # Release capture and destroy all windows
    cam.release()
    cv2.destroyAllWindows()
def detect(config):
    COLOR_AROUND_DOOR = (48, 58, 221)
    COLOR_DOOR = (23, 158, 21)
    COLOR_LINE = (214, 4, 54)
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    door_array = [611, 70, 663, 310]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    high_border = 342
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2],
                          door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1],
                                 around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8083
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter()
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' %
                         config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' %
                         config["model"])

    # cap = cv2.VideoCapture(config["source"])
    # if not cap.isOpened():
    #     raise SystemExit('ERROR: failed to open the input video file!')
    # frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    if webcam:
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    img = torch.zeros((3, imgsz, imgsz), device=device)  # init img

    cls_dict = get_cls_dict(config["category_num"])
    #vis = BBoxVisualization(cls_dict)
    vis = None
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"],
                       config["letter_box"])

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (256, 256)
        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            t0 = time.time()

            flag_move = False
            flag_anyone_in_door = False

            ratio_detection = 0
            # Process detections
            lost_ids = counter.return_lost_ids()
            if webcam:  # batch_size >= 1
                p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy(
                )  # TODO mb needed in loop for detection
            else:
                p, s, im0 = path, '', im0s
            preds, confs, clss = perform_detection(
                frame=im0,
                trt_yolo=trt_yolo,
                conf_th=config["conf_thres"],
                vis=vis)
            scaled_pred = []
            scaled_conf = []
            detections = torch.Tensor()
            for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)):
                if det is not None and len(det):
                    if names[int(cls)] not in config["needed_classes"]:
                        continue
                    det = xyxy_to_xywh(det)
                    # det = scale_coords(img_shape, det, im0.shape)
                    scaled_pred.append(det)
                    scaled_conf.append(conf)
                detections = torch.Tensor(scaled_pred)
                confidences = torch.Tensor(scaled_conf)
                # Pass detections to deepsort
            if len(detections) != 0:
                outputs = deepsort.update(detections, confidences, im0)
                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    # print('bbox_xywh ', bbox_xywh, 'id', identities)
                    counter.update_identities(identities)
                    for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):
                        ratio_initial = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_around_door)
                        ratio_door = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_door)
                        #  чел первый раз в контуре двери
                        if ratio_initial > 0.2:
                            if VideoHandler.counter_frames_indoor == 0:
                                #     флаг о начале записи
                                VideoHandler.start_video(id_tracked)
                            flag_anyone_in_door = True
                        elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                            VideoHandler.continue_opened_video(id=id_tracked,
                                                               seconds=3)
                            flag_anyone_in_door = True
                        if id_tracked not in counter.people_init or counter.people_init[
                                id_tracked] == 0:
                            counter.obj_initialized(id_tracked)
                            if ratio_door >= 0.2 and low_border < bbox_tracked[
                                    3] < high_border:
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif ratio_door < 0.2:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                            # else:
                            #     # res is None, means that object is not in door contour
                            #     counter.people_init[id_tracked] = 1
                            counter.frame_age_counter[id_tracked] = 0
                            counter.people_bbox[id_tracked] = bbox_tracked
                        counter.cur_bbox[id_tracked] = bbox_tracked
            else:
                deepsort.increment_ages()
                if counter.need_to_clear():
                    counter.clear_all()
            # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(
                    np.array([(door_c[i] - cur_c[i])**2
                              for i in range(len(door_c))]))
                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val],
                                            rect_compare=rect_door)
                if val in lost_ids and counter.people_init[val] != -1:
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction
                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="вышел из кабинета")
                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 5000:
                        print('ratio in: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="зашел внутрь")
                        vals_to_del.append(val)
                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])), COLOR_DOOR,
                          3)

            cv2.rectangle(
                im0, (int(around_door_array[0]), int(around_door_array[1])),
                (int(around_door_array[2]), int(around_door_array[3])),
                COLOR_AROUND_DOOR, 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (680, low_border),
                     COLOR_LINE, 4)
            cv2.line(im0, (door_array[0], high_border), (680, high_border),
                     COLOR_LINE, 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(
                    bytes(
                        VideoHandler.video_name + ":" +
                        VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('data_files/logs2.txt', 'a',
                          encoding="utf-8-sig") as wr:
                    wr.write(
                        'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n'
                        .format(VideoHandler.video_name,
                                VideoHandler.action_occured, centroid_distance,
                                ratio_initial, ratio_door, ratio))

                print('_________________video was sent _________________')

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)
            if view_img is True:
                cv2.imshow('image', im0)
                cv2.waitKey(1)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len(fpeses) < 15:
                fpeses.append(round(1 / delta_time))
                print(delta_time)
            elif len(fpeses) == 15:
                # fps = round(np.median(np.array(fpeses)))
                median_fps = float(np.median(np.array(fpeses)))
                fps = round(median_fps, 2)
                print('max fps: ', fps)
                fps = 20
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                if VideoHandler.flag_writing_video:
                    print('\writing video ')
                if VideoHandler.flag_stop_writing:
                    print('stop writing')
                if flag_anyone_in_door:
                    print('anyone in door')
                if VideoHandler.counter_frames_indoor:
                    print('counter frames indoor: {}'.format(
                        VideoHandler.counter_frames_indoor))
def detect(config):
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    # door_array = [528, 21, 581, 315]
    # door_array = [596, 76, 650, 295]  #  18 stream
    door_array = [611, 70, 663, 310]
    # around_door_array = [572, 79, 694, 306]  #
    # around_door_array = [470, 34, 722, 391]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8084
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter(counter_in=0, counter_out=0, track_id=0)
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"])

    cap = cv2.VideoCapture(config["source"])
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

    cls_dict = get_cls_dict(config["category_num"])
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"])


    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (288, 288)
        # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        while True:
            ret, im0 = cap.read()
            if not ret:
                break
 
            preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis)

            flag_move = False
            flag_anyone_in_door = False
            t0 = time.time()
            ratio_detection = 0

            # Process detections
            lost_ids = counter.return_lost_ids()
            for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)):  

                if det is not None and len(det):
                    # Rescale boxes from imgsz to im0 size
                    # det = scale_coords(img_shape, det, im0.shape).round()
                    if names[int(cls)] not in config["needed_classes"]:
                    	continue
                    # bbox_xywh = []
                    # confs = []
                    # Write results
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(det, im0, label=label, color=colors[int(cls)])

            detections = torch.Tensor(preds)
            confidences = torch.Tensor(confs)

            # Pass detections to deepsort
            if len(detections) == 0:
                continue
            outputs = deepsort.update(detections, confidences, im0)
            print('detections ', detections)
            print('outputs ', outputs)          

            # draw boxes for visualization
            if len(outputs) > 0:
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]
                draw_boxes(im0, bbox_xyxy, identities)
                print('bbox_xyxy ', bbox_xyxy)
                counter.update_identities(identities)

                for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):

                    rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1],
                                               bbox_tracked[2], bbox_tracked[3])
                    inter_detection = rect_detection & rect_around_door
                    if inter_detection:
                        inter_square_detection = rect_square(*inter_detection)
                        cur_square_detection = rect_square(*rect_detection)
                        try:
                            ratio_detection = inter_square_detection / cur_square_detection
                        except ZeroDivisionError:
                            ratio_detection = 0
                        #  чел первый раз в контуре двери
                    if ratio_detection > 0.2:
                        if VideoHandler.counter_frames_indoor == 0:
                            #     флаг о начале записи
                            VideoHandler.start_video(id_tracked)
                        flag_anyone_in_door = True

                    elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                        VideoHandler.continue_opened_video(id=id_tracked, seconds=3)
                        flag_anyone_in_door = True

                    # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1:
                    #     VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005)

                    if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0:
                        counter.obj_initialized(id_tracked)
                        rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2],
                                              bbox_tracked[3])
                        intersection = rect_head & rect_door
                        if intersection:
                            intersection_square = rect_square(*intersection)
                            head_square = rect_square(*rect_head)
                            rat = intersection_square / head_square
                            if rat >= 0.4 and bbox_tracked[3] > low_border :
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif rat < 0.4:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                        else:
                            # res is None, means that object is not in door contour
                            counter.people_init[id_tracked] = 1
                        counter.frame_age_counter[id_tracked] = 0

                        counter.people_bbox[id_tracked] = bbox_tracked

                    counter.cur_bbox[id_tracked] = bbox_tracked
                else:
                    deepsort.increment_ages()
                # Print time (inference + NMS)
                t2 = torch_utils.time_synchronized()

                # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                inter = 0
                cur_square = 0
                ratio = 0
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))]))

                # init_c = find_centroid(counter.people_bbox[val])
                # vector_person = (cur_c[0] - init_c[0],
                #                  cur_c[1] - init_c[1])

                rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1],
                                     counter.cur_bbox[val][2], counter.cur_bbox[val][3])
                inter = rect_cur & rect_door

                if val in lost_ids and counter.people_init[val] != -1:

                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square

                        except ZeroDivisionError:
                            ratio = 0
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction

                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел из кабинета")

                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 1000:
                        print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="зашел внутрь")
                        vals_to_del.append(val)

                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:
                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square
                        except ZeroDivisionError:
                            ratio = 0

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50),
                          (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])),
                          (23, 158, 21), 3)

            cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])),
                          (int(around_door_array[2]), int(around_door_array[3])),
                          (48, 58, 221), 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr:
                    wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance))

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)

            if view_img:
                cv2.imshow('image', im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len (fpeses) < 30:
                fpeses.append(round(1 / delta_time))
            elif len(fpeses) == 30:
                # fps = round(np.median(np.array(fpeses)))
                fps = np.median(np.array(fpeses))
                # fps = 3
                print('fps set: ', fps)
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                print('\nflag writing video: ', VideoHandler.flag_writing_video)
                print('flag stop writing: ', VideoHandler.flag_stop_writing)
                print('flag anyone in door: ', flag_anyone_in_door)
                print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
Beispiel #25
0
class yolov4(object):
    def __init__(self):
        """ Constructor """

        self.bridge = CvBridge()
        self.init_params()
        self.init_yolo()
        self.cuda_ctx = cuda.Device(0).make_context()
        self.trt_yolo = TrtYOLO((self.model_path + self.model),
                                (self.h, self.w), self.category_num)

    def __del__(self):
        """ Destructor """

        self.cuda_ctx.pop()
        del self.trt_yolo
        del self.cuda_ctx

    def clean_up(self):
        """ Backup destructor: Release cuda memory """

        if self.trt_yolo is not None:
            self.cuda_ctx.pop()
            del self.trt_yolo
            del self.cuda_ctx

    def init_params(self):
        """ Initializes ros parameters """

        rospack = rospkg.RosPack()
        package_path = rospack.get_path("yolov4_trt_ros")
        self.video_topic = rospy.get_param("/video_topic", "/video_source/raw")
        self.model = rospy.get_param("/model", "yolov4")
        self.model_path = rospy.get_param("/model_path",
                                          package_path + "/yolo/")
        self.input_shape = rospy.get_param("/input_shape", "416")
        self.category_num = rospy.get_param("/category_number", 80)
        self.conf_th = rospy.get_param("/confidence_threshold", 0.5)
        self.show_img = rospy.get_param("/show_image", True)
        self.image_sub = rospy.Subscriber(self.video_topic,
                                          Image,
                                          self.img_callback,
                                          queue_size=1,
                                          buff_size=1920 * 1080 * 3)
        self.detection_pub = rospy.Publisher("detections",
                                             Detector2DArray,
                                             queue_size=1)
        self.overlay_pub = rospy.Publisher("/result/overlay",
                                           Image,
                                           queue_size=1)

    def init_yolo(self):
        """ Initialises yolo parameters required for trt engine """

        if self.model.find('-') == -1:
            self.model = self.model + "-" + self.input_shape
            yolo_dim = self.model.split('-')[-1]

        if 'x' in yolo_dim:
            dim_split = yolo_dim.split('x')
            if len(dim_split) != 2:
                raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)
            self.w, self.h = int(dim_split[0]), int(dim_split[1])
        else:
            self.h = self.w = int(yolo_dim)
        if self.h % 32 != 0 or self.w % 32 != 0:
            raise SystemExit('ERROR: bad yolo_dim (%s)!' % yolo_dim)

        cls_dict = get_cls_dict(self.category_num)
        self.vis = BBoxVisualization(cls_dict)

    def img_callback(self, ros_img):
        """Continuously capture images from camera and do object detection """

        tic = time.time()

        # converts from ros_img to cv_img for processing
        try:
            cv_img = self.bridge.imgmsg_to_cv2(ros_img,
                                               desired_encoding="bgr8")
            rospy.logdebug("ROS Image converted for processing")
        except CvBridgeError as e:
            rospy.loginfo("Failed to convert image %s", str(e))

        if cv_img is not None:
            boxes, confs, clss = self.trt_yolo.detect(cv_img, self.conf_th)

        cv_img = self.vis.draw_bboxes(cv_img, boxes, confs, clss)
        toc = time.time()
        fps = 1.0 / (toc - tic)

        self.publisher(boxes, confs, clss)

        if self.show_img:
            cv_img = show_fps(cv_img, fps)
            cv2.imshow("YOLOv4 DETECTION RESULTS", cv_img)
            cv2.waitKey(1)

        # converts back to ros_img type for publishing
        try:
            overlay_img = self.bridge.cv2_to_imgmsg(cv_img,
                                                    encoding="passthrough")
            rospy.logdebug("CV Image converted for publishing")
            self.overlay_pub.publish(overlay_img)
        except CvBridgeError as e:
            rospy.loginfo("Failed to convert image %s", str(e))

    def publisher(self, boxes, confs, clss):
        """ Publishes to detector_msgs

        Parameters:
        boxes (List(List(int))) : Bounding boxes of all objects
        confs (List(double))	: Probability scores of all objects
        clss  (List(int))	: Class ID of all classes
        """
        detection2d = Detector2DArray()
        detection = Detector2D()

        for i in range(len(boxes)):
            # boxes : xmin, ymin, xmax, ymax
            for _ in boxes:
                detection.header.stamp = rospy.Time.now()
                detection.header.frame_id = "camera"  # change accordingly
                detection.results.id = clss[i]
                detection.results.score = confs[i]

                detection.bbox.center.x = (boxes[i][2] - boxes[i][0]) / 2
                detection.bbox.center.y = (boxes[i][3] - boxes[i][1]) / 2
                detection.bbox.center.theta = 0.0  # change if required

                detection.bbox.size_x = abs(boxes[i][0] - boxes[i][2])
                detection.bbox.size_y = abs(boxes[i][1] - boxes[i][3])

            detection2d.detections.append(detection)

        self.detection_pub.publish(detection2d)
Beispiel #26
0
elif 'nvarguscamerasrc' in gst_elements:
    gst_str = ('nvarguscamerasrc ! '
               'video/x-raw(memory:NVMM), '
               'width=(int)1920, height=(int)1080, '
               'format=(string)NV12, framerate=(fraction)30/1 ! '
               'nvvidconv flip-method=2 ! '
               'video/x-raw, width=(int){}, height=(int){}, '
               'format=(string)BGRx ! '
               'videoconvert ! appsink').format(width, height)
else:
    raise RuntimeError('onboard camera source not found!')

cap = VideoCapture(gst_str)

print("Start to load YoloV4 model")
trt_yolo = TrtYOLO('yolov4_my-416', (416, 416), 4)

print("YoloV4 model is loaded.")

cls_dict = get_cls_dict(4)
vis = BBoxVisualization(cls_dict)


def detect_center(bboxes):
    center_x = (bboxes[0][0] / 416 + bboxes[0][2] / 416) / 2.0 - 0.5
    center_y = (bboxes[0][1] / 416 + bboxes[0][3] / 416) / 2.0 - 0.5
    return (center_x, center_y)


speed = 0.5
turn_gain = 0.3
Beispiel #27
0
# Added by Adriano Santos

import cv2
import time
import pycuda.autoinit  # This is needed for initializing CUDA driver
from utils.yolo_with_plugins import TrtYOLO
from utils.yolo_classes import get_class_name

# Parameters to test
conf_th = 0.5
nms_threshold = 0.5
opt_device = '0,1'
color = (255, 0, 0)

# Create a model instance
trt_yolo = TrtYOLO("yolov3-spp3-416", (416, 416), 1)


def main(video):
    # Start the fake camera
    camera = cv2.VideoCapture(video)

    while camera.isOpened():
        # Get a frame
        (status, frame) = camera.read()

        # Stop conditions
        key = cv2.waitKey(1) & 0xFF
        if (status == False or key == ord("q")):
            break