Exemple #1
0
def build_tracker(cfg, use_cuda):
    return DeepSort(cfg.DEEPSORT.REID_CKPT,
                    max_dist=cfg.DEEPSORT.MAX_DIST,
                    min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                    nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                    max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                    max_age=cfg.DEEPSORT.MAX_AGE,
                    n_init=cfg.DEEPSORT.N_INIT,
                    nn_budget=cfg.DEEPSORT.NN_BUDGET,
                    use_cuda=use_cuda)
Exemple #2
0
 def __init__(self,
              filter_class=None,
              model='yolox-s',
              ckpt='wieghts/yolox_s.pth',
              LineMapping=None):
     self.LineMapping = LineMapping
     self.detector = Predictor(model, ckpt)
     cfg = get_config()
     cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
     self.deepsort = DeepSort(
         cfg.DEEPSORT.REID_CKPT,
         max_dist=cfg.DEEPSORT.MAX_DIST,
         min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
         nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
         max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
         max_age=cfg.DEEPSORT.MAX_AGE,
         n_init=cfg.DEEPSORT.N_INIT,
         nn_budget=cfg.DEEPSORT.NN_BUDGET,
         use_cuda=True)
     self.filter_class = COCO_CLASSES
Exemple #3
0
def load_info(config_deepsort, device, imgsz, out, weights):

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(device)

    if not os.path.exists(out):
        os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    return t0, half, device, model, names, deepsort
Exemple #4
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    array_detected_object = []

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    peopleIn = 0
    peopleOut = 0
    detectedIds = []

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        array_detected_object.append(identity)
                        array_detected_object = list(
                            dict.fromkeys(array_detected_object))

                        xas = 0
                        yas = 0

                        if identity >= 0:
                            xas = bbox_xyxy[0][0]
                            yas = bbox_xyxy[0][1]

                        if identity not in detectedIds and int(bbox_top) >= 10:
                            detectedIds.append(identity)
                            if int(bbox_top) >= 500 or (int(bbox_left) >= 800
                                                        and
                                                        int(bbox_top) >= 80):
                                peopleOut += 1
                            if int(bbox_top) <= 100:
                                peopleIn += 1

                        # with open(txt_path, 'a') as f:
                        #     f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                        #             bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format
                        # f.write(('%g ' * 3 + '\n') % (identity, bbox_left, bbox_top))  # label format
                        # resultText = str(identity) + '-' + str(bbox_top)
                        # f.write(resultText + '\n')  # label format
                        # f.write(('%g ' * 4 + '\n') % (-1, frame_idx, -1, -1, str(xas), str(yas)))  # label format
                        # f.write(str(identity))
                        # f.write(('%g ' * 1 + '\n') % (identity))
                        # f.write('\n')
                        # f.write(str(bbox_xyxy))
                        # f.write("Number people counted: " + str(len(array_detected_object)))
                        # with open(txt_path, 'r') as fp:
                        #     line = fp.readline()
                        #     cnt = 1
                        #     while line:
                        #         identity = line.split("-")[0]
                        #         infoCheck = line.split("-")[1]
                        #         if identity not in detectedIds:
                        #             detectedIds.append(identity)
                        #             if int(infoCheck) > 680 :
                        #                 peopleOut += 1
                        #             else:
                        #                 peopleIn +=1

                        # print("Line {}: {}".format(cnt, line.strip().split("-")[0]))
                        # line = fp.readline()

                        # cnt += 1
                        # print("All people counted: " + str(peopleIn + peopleOut))
                        # print("Number people in: " + str(peopleIn))
                        # print("Number people out: " + str(peopleOut))

            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(im0,
                        "People in out counted: " + str(peopleIn + peopleOut),
                        (50, 100), font, 0.8, (0, 255, 255), 2, font)
            cv2.putText(im0, "Number people in: " + str(peopleIn), (50, 135),
                        font, 0.8, (0, 255, 255), 2, font)
            cv2.putText(im0, "Number people out: " + str(peopleOut), (50, 170),
                        font, 0.8, (0, 255, 255), 2, font)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    txt_result = str(Path(out)) + '/result-counted.txt'

                    print("All people counted: " + str(peopleIn + peopleOut))
                    print("Number people in: " + str(peopleIn))
                    print("Number people out: " + str(peopleOut))
                    with open(txt_path, 'a') as f:
                        f.write("All people counted: " +
                                str(peopleIn + peopleOut))
                        f.write("Number people in: " + str(peopleIn))
                        f.write("Number people out: " + str(peopleOut))

                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def rtsp_to_mongodb():

    with open("/home/asyed/airflow/dags/parameters.json") as f:

        parms = json.load(f)

    agnostic_nms = parms["agnostic_nms"]
    augment = parms["augment"]
    classes = parms["classes"]
    conf_thres = parms["conf_thres"]
    config_deepsort = parms["config_deepsort"]
    deep_sort_model = parms["deep_sort_model"]
    device = parms["device"]
    dnn = False
    evaluate = parms["evaluate"]
    exist_ok = parms["exist_ok"]
    fourcc = parms["fourcc"]
    half = False
    print(device)
    imgsz = parms["imgsz"]
    iou_thres = parms["iou_thres"]
    max_det = parms["max_det"]
    name = parms["name"]
    # save_vid = parms["save_vid"]
    #show_vid = parms["show_vid"]
    source = parms["source"]
    visualize = parms["visualize"]
    yolo_model = parms["yolo_model"]
    webcam = parms["webcam"]
    save_txt = parms["save_txt"]
    homography = np.array(parms["homography"])

    url = "mongodb://localhost:27017"
    client = MongoClient(url)
    db = client.trajectory_database

    today_date = date.today().strftime("%m-%d-%y")
    new = "file_image_coordinates_" + today_date
    collection = db[new]

    cfg = get_config()
    cfg.merge_from_file(config_deepsort)

    deepsort = DeepSort(deep_sort_model,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    # make new output folder

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(yolo_model, device=device, dnn=dnn)
    stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays

    cudnn.benchmark = True  # set True to speed up constant image size inference

    dataset = LoadStreams(source,
                          img_size=imgsz,
                          stride=stride,
                          auto=pt and not jit)

    bs = len(dataset)  # batch_size

    vid_path, vid_writer = [None] * bs, [None] * bs

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.model.parameters())))  # warmup
        # global framess_im2

        dt, seen = [0.0, 0.0, 0.0, 0.0], 0
        # arr = None
        past = []
        for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):

            t1 = time_sync()
            img = torch.from_numpy(img).to(device)
            # print("raw_frame",img.shape)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            t2 = time_sync()
            dt[0] += t2 - t1

            pred = model(img, augment=augment, visualize=visualize)
            t3 = time_sync()
            dt[1] += t3 - t2

            pred = non_max_suppression(pred,
                                       conf_thres,
                                       iou_thres,
                                       classes,
                                       agnostic_nms,
                                       max_det=max_det)
            dt[2] += time_sync() - t3

            # Process detections

            # dets_per_img = []
            for i, det in enumerate(pred):  # detections per image
                seen += 1
                if webcam:  # batch_size >= 1
                    p, im0, _ = path[i], im0s[i].copy(), dataset.count

                    s += f'{i}: '
                else:
                    p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)

                annotator = Annotator(im0, line_width=2, pil=not ascii)

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    xywhs = xyxy2xywh(det[:, 0:4])
                    confs = det[:, 4]
                    clss = det[:, 5]

                    # pass detections to deepsort
                    t4 = time_sync()
                    outputs = deepsort.update(xywhs.cpu(), confs.cpu(),
                                              clss.cpu(), im0)
                    t5 = time_sync()
                    dt[3] += t5 - t4

                    if len(outputs) > 0:
                        for j, (output, conf) in enumerate(zip(outputs,
                                                               confs)):
                            bboxes = output[0:4]
                            id = output[4]
                            cls = output[5]

                            c = int(cls)  # integer class
                            label = f'{id} {names[c]} {conf:.2f}'
                            annotator.box_label(bboxes,
                                                label,
                                                color=colors(c, True))

                            if save_txt:
                                # to MOT format
                                bbox_left = output[0]
                                bbox_top = output[1]
                                bbox_w = output[2] - output[0]
                                bbox_h = output[3] - output[1]
                                # bbox_left = bbox_left + bbox_h
                                bbox_top = bbox_top + bbox_h

                                agent_data = {
                                    'frame': int(frame_idx + 1),
                                    'agent_id': int(id),
                                    "labels": str(names[c]),
                                    "x": int(bbox_left),
                                    "y": int(bbox_top)
                                }

                                print("agent", agent_data)

                                collection.insert_one(agent_data)

                                #db.object_detection.insert_one(agent_data)
                                #db.pedestrian_detection_15_june.insert_one(agent_data)
                                #db.test_21_july.insert_one(agent_data)

                    LOGGER.info(
                        f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)'
                    )

                else:
                    deepsort.increment_ages()
                    LOGGER.info('No detections')

                im0 = annotator.result()
Exemple #6
0
def main(yolo5_config):
    print("=> main task started: {}".format(
        datetime.now().strftime('%H:%M:%S')))

    # * load model
    a = time.time()
    Model = torch.load(
        yolo5_config.weights,
        map_location=lambda storage, loc: storage.cuda(int(
            yolo5_config.device)))['model'].float().fuse().eval()
    class_names = Model.module.names if hasattr(Model,
                                                'module') else Model.names
    print("==> class names: ", class_names)
    class_colors = [[random.randint(0, 255) for _ in range(3)]
                    for _ in range(len(class_names))]
    b = time.time()
    print("=> load model, cost:{:.2f}s".format(b - a))

    # * clean output folder
    sys_cmd = "rm -rf {}".format(yolo5_config.output)
    child = subprocess.Popen(sys_cmd, shell=True)
    child.wait()
    os.makedirs(yolo5_config.output, exist_ok=True)
    c = time.time()
    print("=> clean the output path, cost:{:.2f}s".format(c - b))

    # * multi process
    if yolo5_config.pools > 1:
        myP = Pool(yolo5_config.pools)
        print("=> using process pool")
    else:
        myP = None
        print("=> using single process")

    # * init deepsort tracker
    if yolo5_config.task in ['track', 'count', 'vector_field']:
        cfg = get_config()
        cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
        deepsort_tracker = DeepSort(
            cfg.DEEPSORT.REID_CKPT,
            max_dist=cfg.DEEPSORT.MAX_DIST,
            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
            max_age=cfg.DEEPSORT.MAX_AGE,
            n_init=cfg.DEEPSORT.N_INIT,
            nn_budget=cfg.DEEPSORT.NN_BUDGET,
            use_cuda=True,
            use_appearence=True)

    # * load image and process
    mycap = Image_Capture(yolo5_config.input)
    if yolo5_config.task == 'count':
        theLine = Count_Line([220, 240], [220, 640])
        class_list = yolo5_config.classes if yolo5_config.classes is not None else [
            0, 1, 2, 3
        ]
        Obj_Counter = Object_Counter([class_names[key] for key in class_list])
    elif yolo5_config.task == 'vector_field':
        Field = Vector_Field()
    elif yolo5_config.task == 'bg_model':
        bg_model = cv2.createBackgroundSubtractorMOG2(125, 20, False)
    else:
        cameArea = Area_Restrict(
            yolo5_config.area,
            [mycap.get_height(), mycap.get_width()])
    total_num = mycap.get_length()
    while mycap.ifcontinue():
        ret, img, img_name = mycap.read()
        if ret:
            save_path = os.path.join(yolo5_config.output, img_name)
            if yolo5_config.task == 'detect':
                if myP is not None:
                    myP.apply_async(Detection_Processing,
                                    args=(
                                        img,
                                        save_path,
                                        yolo5_config,
                                        Model,
                                        class_names,
                                        cameArea,
                                        class_colors,
                                    ))
                else:
                    Detection_Processing(img, save_path, yolo5_config, Model,
                                         class_names, cameArea, class_colors)
            elif yolo5_config.task == 'dense':
                if myP is not None:
                    myP.apply_async(Denseing_Processing,
                                    args=(
                                        img,
                                        save_path,
                                        yolo5_config,
                                        Model,
                                        class_names,
                                        cameArea,
                                        class_colors,
                                    ))
                else:
                    Denseing_Processing(img, save_path, yolo5_config, Model,
                                        class_names, cameArea, class_colors)
            elif yolo5_config.task == 'track':
                Tracking_Processing(myP, img, save_path, yolo5_config, Model,
                                    class_names, cameArea, deepsort_tracker,
                                    class_colors)
            elif yolo5_config.task == 'count':
                Counting_Processing(myP, img, save_path, yolo5_config, Model,
                                    class_names, theLine, deepsort_tracker,
                                    Obj_Counter, class_colors)
            elif yolo5_config.task == 'vector_field':
                Vector_Field_Processing(myP, img, save_path, yolo5_config,
                                        Model, class_names, Field,
                                        deepsort_tracker, class_colors)
            elif yolo5_config.task == 'bg_model':
                Background_Modeling(myP, img, save_path, bg_model)
            elif yolo5_config.task == 'empty':
                cv2.imwrite(save_path, img)
                time.sleep(0.04)
        sys.stdout.write("\r=> processing at %d; total: %d" %
                         (mycap.get_index(), total_num))
        sys.stdout.flush()

    if yolo5_config.pools > 1:
        myP.close()
        myP.join()
    mycap.release()
    print("\n=> process done {}/{} images, total cost: {:.2f}s [{:.2f} fps]".
          format(len(os.listdir(yolo5_config.output)), total_num,
                 time.time() - c,
                 len(os.listdir(yolo5_config.output)) / (time.time() - c)))

    # * merge video
    if yolo5_config.video:
        print("=> generating video, may take some times")
        merge_video(yolo5_config.output)

    print("=> main task finished: {}".format(
        datetime.now().strftime('%H:%M:%S')))
def detect(opt, device, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    colorOrder = ['red', 'purple', 'blue', 'green', 'yellow', 'orange']
    frame_num = 0
    framestr = 'Frame {frame}'
    fpses = []
    frame_catch_pairs = []
    ball_person_pairs = {}

    for color in colorDict:
        ball_person_pairs[color] = 0

    # Read Class Name Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                bbox_xywh = []
                confs = []
                clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clses.append([cls.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                clses = torch.Tensor(clses)
                # Pass detections to deepsort
                outputs = []
                global groundtruths_path
                if not 'disable' in groundtruths_path:
                    # print('\nenabled', groundtruths_path)
                    groundtruths = solution.load_labels(
                        groundtruths_path, img_w, img_h, frame_num)
                    if (groundtruths.shape[0] == 0):
                        outputs = deepsort.update(xywhs, confss, clses, im0)
                    else:
                        # print(groundtruths)
                        xywhs = groundtruths[:, 2:]
                        tensor = torch.tensor((), dtype=torch.int32)
                        confss = tensor.new_ones((groundtruths.shape[0], 1))
                        clses = groundtruths[:, 0:1]
                        outputs = deepsort.update(xywhs, confss, clses, im0)

                    if frame_num >= 2:
                        for real_ID in groundtruths[:, 1:].tolist():
                            for DS_ID in xyxy2xywh(outputs[:, :5]):
                                if (abs(DS_ID[0] - real_ID[1]) / img_w < 0.005
                                    ) and (abs(DS_ID[1] - real_ID[2]) / img_h <
                                           0.005) and (
                                               abs(DS_ID[2] - real_ID[3]) /
                                               img_w < 0.005) and (
                                                   abs(DS_ID[3] - real_ID[4]) /
                                                   img_w < 0.005):
                                    id_mapping[DS_ID[4]] = int(real_ID[0])
                else:
                    outputs = deepsort.update(xywhs, confss, clses, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    clses = outputs[:, 5]
                    scores = outputs[:, 6]

                    #Temp solution to get correct id's
                    mapped_id_list = []
                    for ids in identities:
                        if (ids in id_mapping):
                            mapped_id_list.append(int(id_mapping[ids]))
                        else:
                            mapped_id_list.append(ids)

                    ball_detect, frame_catch_pairs, ball_person_pairs = solution.detect_catches(
                        im0, bbox_xyxy, clses, mapped_id_list, frame_num,
                        colorDict, frame_catch_pairs, ball_person_pairs,
                        colorOrder, save_img)

                    t3 = time_synchronized()
                    draw_boxes(im0, bbox_xyxy, [names[i] for i in clses],
                               scores, ball_detect, identities)
                else:
                    t3 = time_synchronized()

            #Draw frame number
            tmp = framestr.format(frame=frame_num)
            t_size = cv2.getTextSize(tmp, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            cv2.putText(im0, tmp, (0, (t_size[1] + 10)),
                        cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)

            #Inference Time
            fps = (1 / (t3 - t1))
            fpses.append(fps)
            print('FPS=%.2f' % fps)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            frame_num += 1

    #t4 = time_synchronized()
    avgFps = (sum(fpses) / len(fpses))
    print('Average FPS = %.2f' % avgFps)
    #print('Total Runtime = %.2f' % (t4 - t0))

    outpath = os.path.basename(source)
    outpath = outpath[:-4]
    outpath = out + '/' + outpath + '_out.csv'
    solution.write_catches(outpath, frame_catch_pairs, colorOrder)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
def main():
    video_name = '1.avi'
    cap = cv2.VideoCapture(f'data/videos/{video_name}')
    fource = cv2.VideoWriter_fourcc(*'mp4v')
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    vid_writer = cv2.VideoWriter(f'runs/track/{video_name}.mp4', fource, 30,
                                 (width, height))
    """ yolov5 目标检测器 """
    yolov5_detector = YOLOv5Detector(weights='weights/yolov5s.pt',
                                     conf_thres=0.6)
    """ deepsort 追踪器 """
    cfg = get_config()
    cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    window_name = 'Anti Running Car Tracking'

    # 每个目标记录 target_point_count 个点,据此判断目标是否滞留
    target_point_count = 50
    path_cars = []

    while True:
        state, frame = cap.read()
        if not state:
            break
        prev_time = time.time()
        """ 检测目标 """
        image, bbox_container = yolov5_detector(frame)
        """ 仅保留车信息"""
        bbox_container = cut_bbox_container(bbox_container)
        """ 初始化一些变量 """
        xywh_bboxs = []
        labels = []
        confs = []
        for bbox in bbox_container:
            xywh_bboxs.append(xyxy_to_xywh(bbox['box']))
            labels.append(bbox['class'])
            confs.append(bbox['confidence'])
        """ 检测到目标后才有追踪 """
        if labels:
            """ detections --> deepsort """
            xywhs = torch.Tensor(xywh_bboxs)
            confss = torch.Tensor(confs)
            outputs = deepsort.update(xywhs, confss, labels, frame)
            obj_ids = []
            bbox_draw = []
            num = 0
            if len(outputs) > 0:
                for (x1, y1, x2, y2, label, track_id) in outputs:
                    bbox_draw.append({'class': label, 'box': [x1, y1, x2, y2]})
                    obj_ids.append(track_id)
                    """ 记录所有目标的路径 每个目标记录点数为 target_point_count """
                    while track_id > len(path_cars):
                        path_cars.append([])
                    path_cars[track_id - 1].append(
                        (0.5 * (x1 + x2), 0.5 * (y1 + y2)))
                    """ 超过的点数从首点删除 """
                    while len(path_cars[track_id - 1]) > target_point_count:
                        path_cars[track_id - 1].remove(path_cars[track_id -
                                                                 1][0])
                """ 绘图显示 """
                num = draw_image(frame, bbox_draw, obj_ids, path_cars,
                                 target_point_count)
            """ 输出一些信息 """
            for info in bbox_draw:
                print(info)
            print(obj_ids)
            print('---')
            """ fps"""
            fps = int(1 / (time.time() - prev_time))
            cv2.putText(frame,
                        f'fps={fps} max_id={len(path_cars)}', (10, 40),
                        0,
                        1, [0, 255, 0],
                        thickness=1,
                        lineType=cv2.LINE_AA)
            """ 滞留"""
            cv2.putText(frame,
                        f'stop={num}', (10, 80),
                        0,
                        1, [0, 255, 0],
                        thickness=1,
                        lineType=cv2.LINE_AA)
            """ 拥堵"""
            if num >= 3:
                cv2.putText(frame,
                            f'crowded{num}', (10, 120),
                            0,
                            1, [0, 255, 0],
                            thickness=1,
                            lineType=cv2.LINE_AA)
            else:
                cv2.putText(frame,
                            f'normal', (10, 120),
                            0,
                            1, [0, 255, 0],
                            thickness=1,
                            lineType=cv2.LINE_AA)
            """ 车的总数"""
            cv2.putText(frame,
                        f'total number={len(bbox_container)}', (10, 160),
                        0,
                        1, [0, 255, 0],
                        thickness=1,
                        lineType=cv2.LINE_AA)

        cv2.imshow(window_name, frame)
        vid_writer.write(frame)
        cv2.waitKey(1)
        """ 点 x 退出 """
        if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
            break
    cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()
Exemple #9
0
from deep_sort.configs.parser import get_config
from deep_sort.deep_sort import DeepSort
import torch
import cv2
import random

colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(80)]
palette = (2**11 - 1, 2**15 - 1, 2**20 - 1)
cfg = get_config()
cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
deepsort = DeepSort(
    cfg.DEEPSORT.REID_CKPT,  # deepsort初始化
    max_dist=cfg.DEEPSORT.MAX_DIST,
    min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
    nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
    max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
    max_age=cfg.DEEPSORT.MAX_AGE,
    n_init=cfg.DEEPSORT.N_INIT,
    nn_budget=cfg.DEEPSORT.NN_BUDGET,
    use_cuda=True)


def plot_bboxes(image, bboxes, line_thickness=None):
    # Plots one bounding box on image img
    img_again = image.copy()
    tl = line_thickness or round(0.002 * (image.shape[0] + image.shape[1]) /
                                 2) + 1  # line/font thickness
    for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
        if pos_id < 80:
            color = colors[pos_id]
        else:
Exemple #10
0
def detect(weights='',
           source='inferences/images',
           output='inferences/output', 
           img_size=640, 
           conf_thres=0.4,
           iou_thres=0.5, 
           device='', 
           view_img=False,
           save_img=False,
           save_txt=False,
           classes=None,
           agnostic_nms=True,
           augment=True,
           update=True,
           fps_count=1,
           line_coordinat = [],  # [[(x1,y1),(x2,y2)], ...]
           polygon=[],  # [[[(x1,y1),(x2,y2),(x3,y3),...], [0(u)/1(r)/2(d)/3(l)]], ...]
           invalid_move = []  # [[0,1], ...]
           ):
    global data
    global stop_is_pressed
    global type_process
    
    out, source, weights, view_img, save_txt, imgsz = \
        output, source, weights, view_img, save_txt, img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file('./deep_sort/configs/deep_sort.yaml')
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = torch_utils.select_device(device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    google_utils.attempt_download(weights)
    model = torch.load(weights, map_location=device)[
        'model'].float().eval()  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        # view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
        # dataset = LoadWebcam(source, img_size=imgsz)
    else:
        save_img = True
        # view_img = True
        dataset = LoadImages(source, img_size=imgsz)
        # dataset = LoadStreams(source, img_size=imgsz)
    # save_img = True
    # view_img = True

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
                for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    # print(model)
    k = 0
    limit = 60
    id_limit = 50
    trap_xy = 0
    trap_frame = False
    output_all_frames = {}
    counting_id = []
    invalid_direction_id = []
    invalid_turn_id = []
    for path, img, im0s, vid_cap in dataset:
        # print(stop_is_pressed)
        while(stop_is_pressed):
        #     cv2.imwrite('./static/images/last_im0.jpg', im0)
            yield (b'--frame\r\n'
                b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')
            # break
        # else:
        k += 1
        trap_xy += 1
        if k >= fps_count:
            k = 0
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            # print(model(img, augment=augment)[0])
            # Inference
            t1 = torch_utils.time_synchronized()
            pred = model(img, augment=augment)[0]
            # print('pred b4 nms', pred)
            # Apply NMS
            pred = non_max_suppression(
                pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
            t2 = torch_utils.time_synchronized()
            # print('pred', pred)
            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                save_path = str(Path(out) / Path(p).name)
                txt_path = str(Path(out) / Path(p).stem) + ('_%g' %
                                                            dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                # normalization gain whwh
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(
                        img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string
                    
                    bbox_xywh = []
                    confs = []
                    save_from_det = {}
                    for x in range(0, 5):
                        save_from_det[x] = []
                    # Write results
                    for *xyxy, conf, cls in det:
                        for x in range(0, 5):
                            if int(cls) == x:
                                # data[x] += 1
                                save_from_det[x].append(
                                    [int(xyxy[0].item()), int(xyxy[1].item()),
                                     int(xyxy[2].item()), int(xyxy[3].item())])
                                break

                        img_h, img_w, _ = im0.shape
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])

                        # if save_txt:  # Write to file
                        #     xywh = (xyxy2xywh(torch.tensor(xyxy).view(
                        #         1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        #     with open(txt_path + '.txt', 'a') as f:
                        #         f.write(('%g ' * 5 + '\n') %
                        #                 (cls, *xywh))  # label format

                        # if save_img or view_img:  # Add bbox to image
                        #     label = '%s %.2f' % (names[int(cls)], conf)
                        #     plot_one_box(xyxy, im0, label=label,
                        #                     color=colors[int(cls)], line_thickness=2)
                    # print('Save from det : ', save_from_det)
                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    for p, q in line_coordinat:
                        cv2.line(im0, p, q, (100, 255, 100), 2)
                    for x in range(len(polygon)):
                        poly = polygon[x][0]
                        pts = np.array(poly, np.int32)
                        pts = pts.reshape((-1, 1, 2)) 
                        cv2.polylines(im0, [pts], True, (255, 0, 0), 1) 
  
                    
                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0)
                    # print('Output Deep Sort: ', outputs)
                    # draw boxes for visualization
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        draw_boxes(im0, bbox_xyxy, identities)
                        # Save all results to dictionary
                        for i, box in enumerate(bbox_xyxy):
                            x1, y1, x2, y2 = [int(i) for i in box]
                            # print('x1 y1 x2 y2 : ', x1,y1, x2, y2)
                            # print('i : ', i)
                            # print('int(identities[i]) : ', int(identities[i]))
                            ds_class = float('inf')
                            smallest = float('inf')
                            for x in save_from_det:
                                for sx1, sy1, sx2, sy2 in save_from_det[x]:
                                    diff = sum(abs(np.array([x1, y1, x2, y2])
                                            -np.array([sx1, sy1, sx2, sy2])))
                                    if diff < smallest:
                                        smallest = diff
                                        ds_class = x
                            if int(identities[i]) in output_all_frames.keys():
                                # check crossed line
                                if type_process[0]:
                                    (w1, h1) = (x2-x1, y2-y1)
                                    prev_xyxy = output_all_frames[int(identities[i])][0][-1]
                                    # print('prev xyxy', prev_xyxy)
                                    (xp, yp) = (int(prev_xyxy[0]), int(prev_xyxy[1]))
                                    (wp, hp) = (int(prev_xyxy[2]-xp), int(prev_xyxy[3]-yp))
                                    # p1 = (int(x1 + (w1-x1)/2), int(y1 + (h1-y1)/2))
                                    # q1 = (int(xp + (wp-xp)/2), int(yp + (hp-yp)/2))
                                    p1 = (int(x1 + (w1)/2), int(y1 + (h1)/2))
                                    q1 = (int(xp + (wp)/2), int(yp + (hp)/2))
                                    # print('p1 q1 : ', p1, q1)
                                    cv2.line(im0, p1, q1, (10, 255, 10), 3)
                                    pt1 = IPoint(p1[0], p1[1])
                                    qt1 = IPoint(q1[0], q1[1])
                                    for p2, q2 in line_coordinat:
                                        p2 = IPoint(p2[0], p2[1])
                                        q2 = IPoint(q2[0], q2[1])
                                        if doIntersect(pt1, qt1, p2, q2):
                                            if int(identities[i]) not in counting_id:
                                                counting_id.append(int(identities[i]))
                                                data[most_frequent(output_all_frames[int(identities[i])][1])] += 1
                                # check direction
                                # if type_process[1] and len(output_all_frames[int(identities[i])][0]) >= limit:
                                #     #change xyxy to the oldest
                                #     prev_xyxy = output_all_frames[int(identities[i])][0][0]
                                #     (xp, yp) = (int(prev_xyxy[0]), int(prev_xyxy[1]))
                                #     (wp, hp) = (int(prev_xyxy[2]-xp), int(prev_xyxy[3]-yp))
                                #     q1 = (int(xp + (wp)/2), int(yp + (hp)/2))
                                #     minus_x = q1[0] - p1[0]
                                #     minus_y = q1[1] - p1[1]
                                #     # minus_y1 = prev_xyxy[1] - y1 
                                #     # minus_y2 = prev_xyxy[3] - y2
                                #     # minus_x1 = prev_xyxy[0] - x1
                                #     # minus_x2 = prev_xyxy[2] - x2
                                #     # 0=up, 1=right, 2=down, 3=left
                                #     # if minus_y1 > 0 and minus_y2 > 0:
                                #     if minus_y > 0:
                                #         output_all_frames[int(identities[i])][4].append(0)
                                #         label = '^'
                                #         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                #         cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                #     # if minus_y1 < 0 and minus_y2 < 0:
                                #     if minus_y < 0:
                                #         output_all_frames[int(identities[i])][4].append(2)
                                #         label = 'v'
                                #         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                #         cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                #     # if minus_x1 > 0 and minus_x2 > 0:
                                #     if minus_x > 0:
                                #         output_all_frames[int(identities[i])][4].append(3)
                                #         label = '<'
                                #         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                #         cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                #     # if minus_x1 < 0 and minus_x2 < 0:
                                #     if minus_x < 0:
                                #         output_all_frames[int(identities[i])][4].append(1)
                                #         label = '>'
                                #         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                #         cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                # check region
                                l_check = 3
                                if type_process[2]:
                                    for n in range(len(polygon)):
                                        path = MPath.Path(polygon[n][0])
                                        # inside2 = path.contains_points([[i[0], i[1]]])
                                        if path.contains_point((x1+int(w1/2), y1+int(h1/2))):
                                            output_all_frames[int(identities[i])][2].append(n+1)
                                            output_all_frames[int(identities[i])][3].append(polygon[n][1])

                                            #check direction
                                            # if type_process[1] and len(output_all_frames[int(identities[i])][0]) >= limit:
                                            if type_process[1] and len(output_all_frames[int(identities[i])][0]) >= 1/3*limit\
                                                    and (trap_xy >= 10 or trap_frame):
                                                trap_xy = 0
                                                trap_frame = True
                                                # change xyxy to the oldest
                                                prev_xyxy = output_all_frames[int(identities[i])][0][0]
                                                (xp, yp) = (int(prev_xyxy[0]), int(prev_xyxy[1]))
                                                (wp, hp) = (int(prev_xyxy[2]-xp), int(prev_xyxy[3]-yp))
                                                q1 = (int(xp + (wp)/2), int(yp + (hp)/2))
                                                minus_x = q1[0] - p1[0]
                                                minus_y = q1[1] - p1[1]
                                                limit_dir = 1/3
                                                if minus_y > 0 and abs(minus_y) > abs(limit_dir*minus_x):
                                                    output_all_frames[int(identities[i])][4].append(0)
                                                    label = '^'
                                                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                                    cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                                if minus_y < 0 and abs(minus_y) > abs(limit_dir*minus_x):
                                                    output_all_frames[int(identities[i])][4].append(2)
                                                    label = 'v'
                                                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                                    cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                                if minus_x > 0 and abs(minus_x) > abs(limit_dir*minus_y):
                                                    output_all_frames[int(identities[i])][4].append(3)
                                                    label = '<'
                                                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                                    cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                                if minus_x < 0 and abs(minus_x) > abs(limit_dir*minus_y):
                                                    output_all_frames[int(identities[i])][4].append(1)
                                                    label = '>'
                                                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1.2, 1)[0]
                                                    cv2.putText(im0, label, (x1, y1 - int(t_size[1]/2)), cv2.FONT_HERSHEY_PLAIN, 1.2, [255, 255, 255], 1)
                                            break
                                # check for invalid turn
                                if len(output_all_frames[int(identities[i])][2]) >= int(1/4*limit) and type_process[2]:
                                    # unique, frequency = np.unique(output_all_frames[int(identities[i])][2],
                                    #                                 return_counts=True)
                                    first = True
                                    region_trace = []
                                    for r in output_all_frames[int(identities[i])][2]:
                                        if first:
                                            reg = r
                                            region_trace.append(r)
                                            first = False
                                        if reg != r:
                                            region_trace.append(r)
                                            reg = r
                                    if len(region_trace) > 1:
                                        # reset centroid capture if move to another region
                                        output_all_frames[int(identities[i])][4] = []
                                        for reg1, reg2 in invalid_move:
                                            for k in range(len(region_trace)):
                                                if k+1 >= len(region_trace):
                                                    break
                                                if (region_trace[k], region_trace[k+1]) == (reg1, reg2):
                                                    if int(identities[i]) not in invalid_turn_id:
                                                        invalid_turn_id.append(int(identities[i]))
                                                        data[6] += 1
                                                    label = 'X'
                                                    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
                                                    cv2.putText(im0, label, (x1 + int(t_size[1]/2), y1), cv2.FONT_HERSHEY_PLAIN, 2, [0, 0, 255], 2)

                                # check for invalid direction
                                if len(output_all_frames[int(identities[i])][3]) >= l_check\
                                        and len(output_all_frames[int(identities[i])][4]) >= l_check\
                                             and type_process[1]:
                                    # if most_frequent(output_all_frames[int(identities[i])][3]) \
                                    #         != most_frequent(output_all_frames[int(identities[i])][4]):
                                    unique, frequency = np.unique(output_all_frames[int(identities[i])][4],
                                                                  return_counts=True)
                                    # true_direction = most_frequent(output_all_frames[int(identities[i])][3])
                                    true_direction = output_all_frames[int(identities[i])][3][-1]
                                    opp_direction = true_direction + 2
                                    if opp_direction > 3:
                                        opp_direction -= 4
                                    
                                    id_opp_in_unique = -1
                                    for x in range(len(unique)):
                                        if opp_direction == unique[x]:
                                            id_opp_in_unique = x
                                            break
                                    # if id_opp_in_unique >= 0 and frequency[id_opp_in_unique] > int(1/4*limit):
                                    if id_opp_in_unique >= 0 and frequency[id_opp_in_unique] > l_check:
                                        print(output_all_frames[int(identities[i])][4], int(identities[i]))
                                        if int(identities[i]) not in invalid_direction_id:
                                            invalid_direction_id.append(int(identities[i]))
                                            data[5] += 1
                                        label = '!'
                                        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
                                        cv2.putText(im0, label, (x1 + int(t_size[1]/2), y1), cv2.FONT_HERSHEY_PLAIN, 2, [0, 0, 255], 2)
                            
                                    # if true_direction not in unique:
                                    #     if int(identities[i]) not in invalid_direction_id:
                                    #         invalid_direction_id.append(int(identities[i]))
                                    #         data[5] += 1
                                    #     label = '!'
                                    #     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
                                    #     cv2.putText(im0, label, (x1 + int(t_size[1]/2), y1), cv2.FONT_HERSHEY_PLAIN, 2, [0, 0, 255], 2)
                                    # else:
                                    #     for x in range(len(unique)):
                                    #         if true_direction == unique[x]:
                                    #             id_true_in_unique = x
                                    #             break
                                    #     if frequency[id_true_in_unique] < int(1/3*limit):
                                    #         if int(identities[i]) not in invalid_direction_id:
                                    #             invalid_direction_id.append(int(identities[i]))
                                    #             data[5] += 1
                                    #         label = '!'
                                    #         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
                                    #         cv2.putText(im0, label, (x1 + int(t_size[1]/2), y1), cv2.FONT_HERSHEY_PLAIN, 2, [0, 0, 255], 2)
                                
                            else:
                                # oaf[ID] = [[in frame coordinat], [class_type], [region]
                                #            [true_direction], [pred_direction]]
                                output_all_frames[int(identities[i])] = [[], [], [], [], []]

                            output_all_frames[int(identities[i])][0].append((x1, y1, x2, y2))
                            if len(output_all_frames[int(identities[i])][0]) > limit:
                                output_all_frames[int(identities[i])][0] = output_all_frames[int(identities[i])][0][-limit:]
                            output_all_frames[int(identities[i])][1].append(ds_class)
                            if len(output_all_frames[int(identities[i])][1]) > limit:
                                output_all_frames[int(identities[i])][1] = output_all_frames[int(identities[i])][1][-limit:]
                            if len(output_all_frames[int(identities[i])][2]) > limit:
                                output_all_frames[int(identities[i])][2] = output_all_frames[int(identities[i])][2][-limit:]
                            if len(output_all_frames[int(identities[i])][3]) > limit:
                                output_all_frames[int(identities[i])][3] = output_all_frames[int(identities[i])][3][-limit:]
                            if len(output_all_frames[int(identities[i])][4]) > limit:
                                output_all_frames[int(identities[i])][4] = output_all_frames[int(identities[i])][4][-limit:]

                        # delete output_all_frames oldest if more than n number of id
                        if len(output_all_frames) > id_limit:
                            unused = list(set(output_all_frames.keys())
                                     -set(sorted(output_all_frames.keys())[-id_limit:]))
                            for x in unused:
                                del output_all_frames[x]
                        # if len(counting_id) > id_limit:
                        #     counting_id = counting_id[-id_limit:]
                        if len(counting_id) > 3:
                            counting_id = counting_id[-3:]
                        # if len(invalid_direction_id) > id_limit:
                        #     invalid_direction_id = invalid_direction_id[-id_limit:]
                        if len(invalid_direction_id) > 3:
                            invalid_direction_id = invalid_direction_id[-3:]
                        # if len(invalid_turn_id) > id_limit:
                        #     invalid_turn_id = invalid_turn_id[-id_limit:]
                        if len(invalid_turn_id) > 3:
                            invalid_turn_id = invalid_turn_id[-3:]
                        # print('All Frame : ', output_all_frames)

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:  
                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                        bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))
                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        print('saving img!')
                        cv2.imwrite(save_path, im0)
                    else:
                        print('saving video!')
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release()  # release previous video writer

                            fourcc = 'mp4v'  # output video codec
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer.write(im0)
            # invalid direction count reset
            trap_frame = False
            # cv2.imshow(p, im0)
            # if cv2.waitKey(1) == ord('q'):  # q to quit
            #     raise StopIteration
            ret, jpeg = cv2.imencode('.jpg', im0)
            frame = jpeg.tobytes()
            yield (b'--frame\r\n'
                b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #11
0
class Tracker():
    def __init__(self,
                 filter_class=None,
                 model='yolox-s',
                 ckpt='wieghts/yolox_s.pth',
                 LineMapping=None):
        self.LineMapping = LineMapping
        self.detector = Predictor(model, ckpt)
        cfg = get_config()
        cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
        self.deepsort = DeepSort(
            cfg.DEEPSORT.REID_CKPT,
            max_dist=cfg.DEEPSORT.MAX_DIST,
            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
            max_age=cfg.DEEPSORT.MAX_AGE,
            n_init=cfg.DEEPSORT.N_INIT,
            nn_budget=cfg.DEEPSORT.NN_BUDGET,
            use_cuda=True)
        self.filter_class = COCO_CLASSES

    def update(self, image, visual=True, logger_=True):
        height, width, _ = image.shape
        _, info = self.detector.inference(image, visual=True, logger_=logger_)
        outputs = []
        for dir_, line in self.LineMapping.items():
            cv2.line(image,
                     line[0],
                     line[1], (46, 162, 112),
                     thickness=8,
                     lineType=cv2.LINE_AA)

        if info['box_nums'] > 0:
            bbox_xywh = []
            scores = []
            objectids = []
            #bbox_xywh = torch.zeros((info['box_nums'], 4))
            for [x1, y1, x2,
                 y2], class_id, score in zip(info['boxes'], info['class_ids'],
                                             info['scores']):
                # if self.filter_class and class_names[int(class_id)] not in self.filter_class:
                #     continue
                # if score < 0.9 and class_names[int(class_id)]  == "bus":
                #     continue
                # color = compute_color_for_labels(int(class_id))
                bbox_xywh.append(
                    [int((x1 + x2) / 2),
                     int((y1 + y2) / 2), x2 - x1, y2 - y1])
                objectids.append(info['class_ids'])
                scores.append(score)

            bbox_xywh = torch.Tensor(bbox_xywh)
            outputs = self.deepsort.update(bbox_xywh, scores,
                                           info['class_ids'], image)
            data = []
            if len(outputs) > 0:
                if visual:
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -2]
                        object_id = outputs[:, -1]
                        if self.LineMapping:
                            image, data = draw_boxes(
                                image,
                                bbox_xyxy,
                                object_id,
                                identities,
                                LineMapping=self.LineMapping)
                            image = vis_track(image, outputs)
            return image, outputs, data
Exemple #12
0
def detect(opt):
    memory = {}
    counter = 0
    out, source, yolo_model, deep_sort_model, show_vid, save_vid, save_txt, imgsz, evaluate, half, project, name, exist_ok= \
        opt.output, opt.source, opt.yolo_model, opt.deep_sort_model, opt.show_vid, opt.save_vid, \
        opt.save_txt, opt.imgsz, opt.evaluate, opt.half, opt.project, opt.name, opt.exist_ok
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(deep_sort_model,
                        torch.device("cpu"),
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET)

    # Initialize
    device = select_device(opt.device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    save_dir.mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(yolo_model, device=device, dnn=opt.dnn)
    stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    # Dataloader
    if webcam:
        show_vid = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source,
                              img_size=imgsz,
                              stride=stride,
                              auto=pt and not jit)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source,
                             img_size=imgsz,
                             stride=stride,
                             auto=pt and not jit)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # extract what is in between the last '/' and last '.'
    txt_file_name = source.split('/')[-1].split('.')[0]
    txt_path = str(Path(save_dir)) + '/' + txt_file_name + '.txt'

    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.model.parameters())))  # warmup
    dt, seen = [0.0, 0.0, 0.0, 0.0], 0
    regionid = set()
    for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):
        t1 = time_sync()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem,
                                   mkdir=True) if opt.visualize else False
        pred = model(img, augment=opt.augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   opt.classes,
                                   opt.agnostic_nms,
                                   max_det=opt.max_det)
        dt[2] += time_sync() - t3
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, _ = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
            s += '%gx%g ' % img.shape[2:]  # print string

            annotator = Annotator(im0,
                                  line_width=2,
                                  font='Arial.ttf',
                                  pil=not ascii)

            if det is not None and len(det):
                tboxes = []
                indexIDs = []
                previous = memory.copy()
                memory = {}
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                t4 = time_sync()
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(),
                                          im0)
                t5 = time_sync()
                dt[3] += t5 - t4

                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)):

                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]
                        roi = [(0, 0), (640, 0), (640, 380), (0, 380)]

                        (x, y) = (int(bboxes[0]), int(bboxes[1]))
                        (w, h) = (int(bboxes[2]), int(bboxes[3]))
                        inside = cv2.pointPolygonTest(np.array(roi), (x, h),
                                                      False)
                        if inside > 0:
                            regionid.add(id)

                        c = int(cls)  # integer class
                        label = f' {names[c]} {conf:.2f}'
                        cv2.putText(im0, "count =" + str(len(regionid)),
                                    (20, 50), 0, 1, (100, 200, 0), 2)
                        annotator.box_label(bboxes,
                                            label,
                                            color=colors(c, True))
                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (
                                    frame_idx + 1,
                                    id,
                                    bbox_left,  # MOT format
                                    bbox_top,
                                    bbox_w,
                                    bbox_h,
                                    -1,
                                    -1,
                                    -1,
                                    -1))

                LOGGER.info(
                    f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)'
                )
                LOGGER.info(f'counter = {len(regionid)}')

            else:
                deepsort.increment_ages()
                LOGGER.info('No detections')

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]

                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                        (w, h))
                vid_writer.write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(
        f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms deep sort update \
        per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_vid:
        print('Results saved to %s' % save_path)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
Exemple #13
0
def detect(opt):
    out, source, yolo_model, deep_sort_model, show_vid, save_vid, save_txt, imgsz, evaluate, half, \
        project, exist_ok, update, save_crop = \
        opt.output, opt.source, opt.yolo_model, opt.deep_sort_model, opt.show_vid, opt.save_vid, \
        opt.save_txt, opt.imgsz, opt.evaluate, opt.half, opt.project, opt.exist_ok, opt.update, opt.save_crop
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    # Directories
    if type(yolo_model) is str:  # single yolo model
        exp_name = yolo_model.split(".")[0]
    elif type(yolo_model) is list and len(yolo_model) == 1:  # single models after --yolo_model
        exp_name = yolo_model[0].split(".")[0]
    else:  # multiple models after --yolo_model
        exp_name = "ensemble"
    exp_name = exp_name + "_" + deep_sort_model.split('/')[-1].split('.')[0]
    save_dir = increment_path(Path(project) / exp_name, exist_ok=exist_ok)  # increment run if project name exists
    (save_dir / 'tracks' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    model = DetectMultiBackend(yolo_model, device=device, dnn=opt.dnn)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    # Dataloader
    if webcam:
        show_vid = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        nr_sources = len(dataset)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        nr_sources = 1
    vid_path, vid_writer, txt_path = [None] * nr_sources, [None] * nr_sources, [None] * nr_sources

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)

    # Create as many trackers as there are video sources
    deepsort_list = []
    for i in range(nr_sources):
        deepsort_list.append(
            DeepSort(
                deep_sort_model,
                device,
                max_dist=cfg.DEEPSORT.MAX_DIST,
                max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
            )
        )
    outputs = [None] * nr_sources

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run tracking
    model.warmup(imgsz=(1 if pt else nr_sources, 3, *imgsz))  # warmup
    dt, seen = [0.0, 0.0, 0.0, 0.0], 0
    for frame_idx, (path, im, im0s, vid_cap, s) in enumerate(dataset):
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255.0  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path[0]).stem, mkdir=True) if opt.visualize else False
        pred = model(im, augment=opt.augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det)
        dt[2] += time_sync() - t3

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            seen += 1
            if webcam:  # nr_sources >= 1
                p, im0, _ = path[i], im0s[i].copy(), dataset.count
                p = Path(p)  # to Path
                s += f'{i}: '
                txt_file_name = p.name
                save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
            else:
                p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)
                p = Path(p)  # to Path
                # video file
                if source.endswith(VID_FORMATS):
                    txt_file_name = p.stem
                    save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
                # folder with imgs
                else:
                    txt_file_name = p.parent.name  # get folder name containing current img
                    save_path = str(save_dir / p.parent.name)  # im.jpg, vid.mp4, ...

            txt_path = str(save_dir / 'tracks' / txt_file_name)  # im.txt
            s += '%gx%g ' % im.shape[2:]  # print string
            imc = im0.copy() if save_crop else im0  # for save_crop

            annotator = Annotator(im0, line_width=2, pil=not ascii)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                t4 = time_sync()
                outputs[i] = deepsort_list[i].update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                t5 = time_sync()
                dt[3] += t5 - t4

                # draw boxes for visualization
                if len(outputs[i]) > 0:
                    for j, (output, conf) in enumerate(zip(outputs[i], confs)):

                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left,  # MOT format
                                                               bbox_top, bbox_w, bbox_h, -1, -1, -1, i))

                        if save_vid or save_crop or show_vid:  # Add bbox to image
                            c = int(cls)  # integer class
                            label = f'{id} {names[c]} {conf:.2f}'
                            annotator.box_label(bboxes, label, color=colors(c, True))
                            if save_crop:
                                txt_file_name = txt_file_name if (isinstance(path, list) and len(path) > 1) else ''
                                save_one_box(bboxes, imc, file=save_dir / 'crops' / txt_file_name / names[c] / f'{id}' / f'{p.stem}.jpg', BGR=True)

                LOGGER.info(f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)')

            else:
                deepsort_list[i].increment_ages()
                LOGGER.info('No detections')

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_vid:
                if vid_path[i] != save_path:  # new video
                    vid_path[i] = save_path
                    if isinstance(vid_writer[i], cv2.VideoWriter):
                        vid_writer[i].release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                    save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
                    vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer[i].write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms deep sort update \
        per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_vid:
        s = f"\n{len(list(save_dir.glob('tracks/*.txt')))} tracks saved to {save_dir / 'tracks'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(yolo_model)  # update model (to fix SourceChangeWarning)
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    #google_utils.attempt_download(weights)
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    #model = torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
    # model.fuse()
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Read Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']
    print(names)

    # Load model
    #google_utils.attempt_download(weights)
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    #model = torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
    # model.fuse()
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t_fps = time_synchronized()
    frame_fps = 0
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                #print(det[:, -1].unique())
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                #clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape

                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    #clses.append([cls.item()])
                    #outputs, clses2 = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), (torch.Tensor(clses)), im0)
                    #outputs = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), im0)

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_tlwh = []
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_boxes(im0, bbox_xyxy, identities)

                    # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))
            print('%sDone.' % s)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

        frame_fps += 1

        if ((time_synchronized() - t_fps) >= 1):
            print('\n')
            print('FPS=%.2f' % (frame_fps))
            t_fps = time_synchronized()
            frame_fps = 0

        #print('FPS=%.2f' % (1/(time_synchronized() - t1)))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
Exemple #16
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    global counter
    global features
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA. Make faster cmputation with lower precision.

    #Write headers into csv file
    with open(str(Path(args.output)) + '/results.csv', 'a') as f:
        f.write("Time,People Count Changed,TotalCount,ActivePerson,\n")

    #Initialize the scheduler for every 2 secs
    scheduler = BackgroundScheduler()
    scheduler.start()
    scheduler.add_job(func=write_csv,
                      args=[opt.output],
                      trigger=IntervalTrigger(seconds=2))

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.csv'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0(640) size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    features['identities'] = identities
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        # with open(txt_path, 'a') as f:
                        # f.write(f"{datetime.now()},{changes if changes != counter else 0},{counter},{len(identities)},\n")  # label format

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1), end='\r')

            # Write Counter on img
            cv2.putText(im0, "Counter : " + str(counter), (10, 20),
                        cv2.FONT_HERSHEY_PLAIN, 2, [1, 190, 200], 2)

            # Stream results
            # if view_img:
            # 	cv2.imshow(p, im0)
            # 	if cv2.waitKey(1) == ord('q'):  # q to quit
            # 		raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    print('saving img!')
                    cv2.imwrite(save_path, im0)
                else:
                    # print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer. Issues with video writer. Fix later

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #17
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    flag = 0  # 비디오 저장시 사용할 플래그
    view_img = True
    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    idx = -1
    compare_dict = {}

    # create a new figure or activate an exisiting figure
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='polar')  # 1,1,1그리드

    for path, img, im0s, vid_cap in dataset:
        #plt
        # Plot origin (agent's start point) - 원점=보행자
        ax.plot(0, 0, color='black', marker='o', markersize=20, alpha=0.3)
        # Plot configuration
        ax.set_rticks([])
        ax.set_rmax(1)
        ax.grid(False)
        ax.set_theta_zero_location("S")  # 0도가 어디에 있는지-S=남쪽
        ax.set_theta_direction(-1)  # 시계방향 극좌표

        img = torch.from_numpy(img).to(device)

        # img 프레임 자르기
        # '''input 이미지 프레임 자르기'''
        img = img[:, 100:260, :]
        temp = img
        add_img = temp[:, :, :32]
        img = torch.cat((img, add_img), dim=2)
        # 결과 이미지 프레임 자르기
        #결과 프레임 자르기 (bouding box와 object 매칭 시키기 위해!!)
        im0s = im0s[200:520, :, :]
        temp = im0s
        add_im0s = temp[:, :64, :]
        print(add_im0s.shape)
        im0s = np.concatenate((im0s, add_im0s), axis=1)

        img = img.half() \
            if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        idx += 1
        if (idx % 10 != 0):  # 동영상 길이 유지
            if len(outputs) > 0:
                ori_im = draw_boxes(im0s, bbox_xyxy, identities,
                                    isCloser)  # 이전 정보로 bbox 그리기
            vid_writer.write(im0s)
            continue

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s  #우리는 여기로 감 - 파일경로 전까지의 출력문은 datasets.py에서 삭제해야함

            save_path = str(Path(out) / Path(p).name)
            #print(dataset.frame) #프레임 번호
            #s += '%gx%g ' % img.shape[2:]  # print string #영상 사이즈 출력 (예:640x320) - 삭제가능
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh

            #만약 차량이 detect된 경우
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results #개수와 클래스 출력(예: 5 cars) -삭제가능
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                #s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape  #결과프레임의 사이즈
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h,
                                                        *xyxy)  #center좌표, w, h
                    obj = [x_c, y_c, bbox_w, bbox_h]

                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = []

                if len(bbox_xywh) != 0:  #뭔가 detect됐다면 deepsort로 보냄
                    outputs, coors, frame, bbox_size = deepsort.update(
                        xywhs, confss, im0, compare_dict, dataset.frame)

                # draw boxes for visualization
                if len(outputs) > 0:
                    print("!", outputs)
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    isCloser = outputs[:, -1]
                    print("isCloser:", isCloser)
                    print(compare_dict)
                    ori_im = draw_boxes(im0, bbox_xyxy, identities,
                                        isCloser)  # bbox 그리기
                    alert.show_direction(ax, coors, bbox_size,
                                         isCloser)  # 방향 display하는 함수 호출

            # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))

            plt.show(block=False)
            # plot 차트 저장
            # idx가 10이면 1부터 9까지 10과 같은 이미지로 저장
            '''
            file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % idx
            plt.savefig(file)
            for j in range(9):
                file = '/Users/wonyeong/Desktop/ewha/project/plotimgs/img%d.png' % (j + idx + 1)
                plt.savefig(file)

            # 차량이 detect된 경우에만 그린다..
            '''
            plt.pause(0.01)
            plt.cla()

            # Stream results
            cv2.imshow('frame', im0)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            print(im0s.shape)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if (flag == 0):
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        #w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        w = 1344
                        h = 320
                        flag = 1
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    else:
                        vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #18
0
def main():
    video_name = 'car.mp4'
    # video_name = 'car.mp4'
    cap = cv2.VideoCapture(f'data/videos/{video_name}')
    fource = cv2.VideoWriter_fourcc(*'mp4v')
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    vid_writer = cv2.VideoWriter(f'runs/track/{video_name}.mp4', fource, 30, (width, height))
    """ yolov5 目标检测器 """
    yolov5_detector = YOLOv5Detector()
    """ deepsort 追踪器 """
    cfg = get_config()
    cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    window_name = 'Real-time tracking'
    while True:
        state, frame = cap.read()
        if not state:
            break
        """ 检测目标 """
        image, bbox_container = yolov5_detector(frame)
        """ 仅保留人车信息"""
        bbox_container = cut_bbox_container(bbox_container)
        """ 初始化一些变量 """
        xywh_bboxs = []
        labels = []
        confs = []
        for bbox in bbox_container:
            xywh_bboxs.append(xyxy_to_xywh(bbox['box']))
            labels.append(bbox['class'])
            confs.append(bbox['confidence'])
        """ 检测到目标后才有追踪 """
        if labels:
            """ detections --> deepsort """
            xywhs = torch.Tensor(xywh_bboxs)
            confss = torch.Tensor(confs)
            outputs = deepsort.update(xywhs, confss, labels, frame)
            obj_ids = []
            bbox_draw = []
            if len(outputs) > 0:
                for (x1, y1, x2, y2, label, track_id) in outputs:
                    bbox_draw.append({'class': label, 'box': [x1, y1, x2, y2]})
                    obj_ids.append(track_id)
                """ 绘图显示 """
                draw_image(frame, bbox_draw, obj_ids)
            """ 输出一些信息 """
            for info in bbox_draw:
                print(info)
            print(obj_ids)
            print('---')
        cv2.imshow(window_name, frame)
        vid_writer.write(frame)
        cv2.waitKey(1)
        """ 点 x 退出 """
        if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
            break
    cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()
Exemple #19
0
    py = int(round(sum[1] / sum[2]))
    #     print(px)
    return np.array([px, py])


app = Flask(__name__)

Bootstrap(app)

cfg = get_config()
cfg.merge_from_file(config_deepsort)

deepsort = DeepSort(deep_sort_model,
                    max_dist=cfg.DEEPSORT.MAX_DIST,
                    max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                    max_age=cfg.DEEPSORT.MAX_AGE,
                    n_init=cfg.DEEPSORT.N_INIT,
                    nn_budget=cfg.DEEPSORT.NN_BUDGET,
                    use_cuda=True)

device = select_device(device)
half &= device.type != 'cpu'  # half precision only supported on CUDA

# The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
# its own .txt file. Hence, in that case, the output folder is not restored
# make new output folder

# Load model
device = select_device(device)
model = DetectMultiBackend(yolo_model, device=device, dnn=dnn)
stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx