Python LoadStreams Examples

Programming Language: Python

Namespace/Package Name: yolov5.utils.datasets

Class/Type: LoadStreams

Examples at hotexamples.com: 29

Python LoadStreams - 29 examples found. These are the top rated real world Python examples of yolov5.utils.datasets.LoadStreams extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LoadStreams(29)

Frequently Used Methods

LoadStreams (29)

Example #1

Show file

 def set_dataloader(self):
     """ Set Dataloader """
     if self.webcam:
         self.view_img = True
         cudnn.benchmark = True  # set True to speed up constant image size inference
         dataset = LoadStreams(self.opt.source, img_size=self.imgsz)
     else:
         self.save_img = True
         dataset = LoadImages(self.opt.source, img_size=self.imgsz)
     return dataset

Example #2

Show file

File: gui_track_yolo_deepsort_3.py Project: vbansal1/Gui_Track

def getData(source, imgsz):
    webcam = source == '0' or source == '2' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Set Dataloader
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)

    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    data = enumerate(dataset)
    return webcam, dataset, data

Example #3

Show file

File: classy_track1.py Project: jigsaw1801/Detect_Count_HoneyBee

def detect(opt, *args):
    out, source, weights, view_img, save_txt, imgsz, save_img, sort_max_age, sort_min_hits, sort_iou_thresh = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.save_img, opt.sort_max_age, opt.sort_min_hits, opt.sort_iou_thresh

    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # Initialize SORT
    sort_tracker = Sort(max_age=sort_max_age,
                        min_hits=sort_min_hits,
                        iou_threshold=sort_iou_thresh)  # {plug into parser}

    # Directory and CUDA settings for yolov5
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load yolov5 model
    model = torch.load(weights, map_location=device)['model'].float(
    )  # load to FP32. yolov5s.pt file is a dictionary, so we retrieve the model by indexing its key
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set DataLoader
    vid_path, vid_writer = None, None

    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        dataset = LoadImages(source, img_size=imgsz)

    # get names of object categories from yolov5.pt model
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img

    # Run once (throwaway)
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'
    memory = []
    flying_in = 0
    flying_out = 0
    path = source
    vs = cv2.VideoCapture(source)
    vs1 = cv2.VideoCapture(path1)
    prop = cv2.CAP_PROP_FRAME_COUNT
    total = int(vs.get(prop))
    img_size = 640
    while True:  # for every frame
        current_frame = vs.get(cv2.CAP_PROP_POS_FRAMES)
        print("%d/%d", (current_frame, total))
        vs.set(1, current_frame)
        (grabbed, im0s) = vs.read()
        if not grabbed:
            break
        img = letterbox(im0s, new_shape=img_size)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        # img_copy = img.copy()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # unint8 to fp16 or fp32
        img /= 255.0  # normalize to between 0 and 1.
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # for each detection in this frame
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += f'{img.shape[2:]}'  # print image size and detection report
            save_path = str(Path(out) / Path(p).name)

            # Rescale boxes from img_size (temporarily downscaled size) to im0 (native) size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0s.shape).round()

            for c in det[:, -1].unique():  # for each unique object category
                n = (det[:, -1] == c).sum()  # number of detections per class
                s += f' - {n} {names[int(c)]}'

            dets_to_sort = np.empty((0, 6))

            # Pass detections to SORT
            # NOTE: We send in detected object class too
            for x1, y1, x2, y2, conf, detclass in det.cpu().detach().numpy():
                dets_to_sort = np.vstack(
                    (dets_to_sort, np.array([x1, y1, x2, y2, conf, detclass])))
            print('\n')
            print('Input into SORT:\n', dets_to_sort, '\n')
            indexIDs = []

            previous = memory.copy()
            memory = []
            id = []
            # Run SORT
            tracked_dets = sort_tracker.update(dets_to_sort)
            print('Output from SORT:\n', tracked_dets, '\n')

            for j, track in enumerate(tracked_dets):
                bbox_x1 = track[0]
                bbox_y1 = track[1]
                bbox_x2 = track[2]
                bbox_y2 = track[3]
                p0 = (bbox_x1, bbox_y1)
                p1 = (bbox_x1, bbox_y2)
                if intersect(p0, p1, line[0], line[1]):
                    memory.append(track[8])
                    print(track[8])
            over_lap = set(memory).intersection(previous)
            curr_frame = current_frame
            if len(over_lap) == len(previous):
                pass
            else:
                a = set(memory).symmetric_difference(previous)
                diffs = a - set(a).intersection(memory)

                k = 0
                while (k < 2):
                    curr_frame = curr_frame + 1
                    vs1.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
                    ret, im0s1 = vs1.read()
                    img1 = letterbox(im0s1, new_shape=img_size)[0]

                    # Convert
                    img1 = img1[:, :, ::-1].transpose(
                        2, 0, 1)  # BGR to RGB, to 3x416x416
                    img1 = np.ascontiguousarray(img1)

                    # img_copy = img.copy()
                    img1 = torch.from_numpy(img1).to(device)
                    img1 = img1.half() if half else img1.float(
                    )  # unint8 to fp16 or fp32
                    img1 /= 255.0  # normalize to between 0 and 1.
                    if img1.ndimension() == 3:
                        img1 = img1.unsqueeze(0)

                    # Inference
                    t11 = time_synchronized()
                    pred1 = model(img1, augment=opt.augment)[0]

                    # Apply NMS
                    pred1 = non_max_suppression(pred1,
                                                opt.conf_thres,
                                                opt.iou_thres,
                                                classes=opt.classes,
                                                agnostic=opt.agnostic_nms)
                    t12 = time_synchronized()

                    # Process detections
                    for i1, det1 in enumerate(
                            pred1):  # for each detection in this frame
                        if webcam:  # batch_size >= 1
                            p1, s1, im01 = path1[i], '%g: ' % i, im0s1[i].copy(
                            )
                        else:
                            p1, s1, im01 = path1, '', im0s1

                        s1 += f'{img1.shape[2:]}'  # print image size and detection report
                        save_path = str(Path(out) / Path(p).name)

                        # Rescale boxes from img_size (temporarily downscaled size) to im0 (native) size
                        det1[:, :4] = scale_coords(img1.shape[2:], det1[:, :4],
                                                   im0s1.shape).round()

                        for c1 in det1[:, -1].unique(
                        ):  # for each unique object category
                            n1 = (det1[:, -1] == c1
                                  ).sum()  # number of detections per class
                            s1 += f' - {n1} {names[int(c1)]}'

                        dets_to_sort1 = np.empty((0, 6))

                        # Pass detections to SORT
                        # NOTE: We send in detected object class too
                        for x11, y11, x12, y12, conf1, detclass1 in det1.cpu(
                        ).detach().numpy():
                            dets_to_sort1 = np.vstack(
                                (dets_to_sort1,
                                 np.array(
                                     [x11, y11, x12, y12, conf1, detclass1])))

                        # Run SORT
                        tracked_dets1 = sort_tracker.update(dets_to_sort1)

                    k += 1
                for tracked_dets1 in tracked_dets1:
                    id.append(tracked_dets1[8])
                print(id)
                print(diffs)
                # print(set(diffs).intersection(id))
                for diff in diffs:
                    indexIDs.append(diff)
                    if set(indexIDs).intersection(id):
                        flying_out += 1
                    else:
                        flying_in += 1
                    indexIDs = []
            # print('Output from SORT2:\n', tracked_dets, '\n')
            print(flying_out)
            print(flying_in)

            # boxes.append([track[0], track[1], track[2], track[3]])
            # indexIDs.append(track[8])
            # memory[indexIDs[-1]] = boxes[-1]

            # draw boxes for visualization
            if len(tracked_dets) > 0:
                bbox_xyxy = tracked_dets[:, :4]
                identities = tracked_dets[:, 8]
                categories = tracked_dets[:, 4]
                draw_boxes(im0, bbox_xyxy, identities, categories, names)

            # Write detections to file. NOTE: Not MOT-compliant format.
            if save_txt and len(tracked_dets) != 0:
                for j, tracked_dets in enumerate(tracked_dets):
                    bbox_x1 = tracked_dets[0]
                    bbox_y1 = tracked_dets[1]
                    bbox_x2 = tracked_dets[2]
                    bbox_y2 = tracked_dets[3]
                    category = tracked_dets[4]
                    u_overdot = tracked_dets[5]
                    v_overdot = tracked_dets[6]
                    s_overdot = tracked_dets[7]
                    identity = tracked_dets[8]

                    with open(txt_path, 'a') as f:
                        f.write(
                            f'{current_frame},{bbox_x1},{bbox_y1},{bbox_x2},{bbox_y2},{category},{u_overdot},{v_overdot},{s_overdot},{identity}\n'
                        )

                    print(f'{s} Done. ({t2 - t1})')
            # Stream image results(opencv)
            if save_img or view_img:
                cv2.line(im0, line[0], line[1], color=(0, 255, 0), thickness=2)
                label = f'HoneyBee : {n}'
                label1 = f'Flying_in : {flying_in}'
                label2 = f'Flying_out : {flying_out}'
                cv2.putText(im0,
                            label,
                            org=(100, 50),
                            fontFace=cv2.FONT_HERSHEY_PLAIN,
                            fontScale=3,
                            color=(0, 255, 0),
                            thickness=3)
                cv2.putText(im0,
                            label1,
                            org=(100, 100),
                            fontFace=cv2.FONT_HERSHEY_PLAIN,
                            fontScale=3,
                            color=(255, 0, 0),
                            thickness=3)
                cv2.putText(im0,
                            label2,
                            org=(100, 150),
                            fontFace=cv2.FONT_HERSHEY_PLAIN,
                            fontScale=3,
                            color=(0, 0, 255),
                            thickness=3)
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
            # Save video results
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vs.get(cv2.CAP_PROP_FPS)
                        w = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #4

Show file

File: track.py Project: AbinashCIS/Tracker

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    global counter
    global features
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA. Make faster cmputation with lower precision.

    #Write headers into csv file
    with open(str(Path(args.output)) + '/results.csv', 'a') as f:
        f.write("Time,People Count Changed,TotalCount,ActivePerson,\n")

    #Initialize the scheduler for every 2 secs
    scheduler = BackgroundScheduler()
    scheduler.start()
    scheduler.add_job(func=write_csv,
                      args=[opt.output],
                      trigger=IntervalTrigger(seconds=2))

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.csv'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0(640) size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    features['identities'] = identities
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        # with open(txt_path, 'a') as f:
                        # f.write(f"{datetime.now()},{changes if changes != counter else 0},{counter},{len(identities)},\n")  # label format

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1), end='\r')

            # Write Counter on img
            cv2.putText(im0, "Counter : " + str(counter), (10, 20),
                        cv2.FONT_HERSHEY_PLAIN, 2, [1, 190, 200], 2)

            # Stream results
            # if view_img:
            # 	cv2.imshow(p, im0)
            # 	if cv2.waitKey(1) == ord('q'):  # q to quit
            # 		raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    print('saving img!')
                    cv2.imwrite(save_path, im0)
                else:
                    # print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer. Issues with video writer. Fix later

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
    print('Done. (%.3fs)' % (time.time() - t0))

Example #5

Show file

File: detect.py Project: yumi2378/yolov5-pip

def detect(
    weights="yolov5s.pt",
    source="yolov5/data/images",
    img_size=640,
    conf_thres=0.75,
    iou_thres=0.45,
    device="",
    view_img=False,
    save_txt=False,
    save_conf=False,
    classes=None,
    agnostic_nms=False,
    augment=False,
    update=False,
    project="runs/detect",
    name="exp",
    exist_ok=False,
    save_img=False,
):
    """
    Args:
        weights: str
            model.pt path(s)
        source: str
            file/folder, 0 for webcam
        img_size: int
            inference size (pixels)
        conf_thres: float
            object confidence threshold
        iou_thres: float
            IOU threshold for NMS
        device: str
            cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img: bool
            display results
        save_txt: bool
            save results to *.txt
        save_conf: bool
            save confidences in save_txt labels
        classes: int
            filter by class: [0], or [0, 2, 3]
        agnostic-nms: bool
            class-agnostic NMS
        augment: bool
            augmented inference
        update: bool
            update all models
        project: str
            save results to project/name
        name: str
            save results to project/name
        exist_ok: bool
            existing project/name ok, do not increment
    """
    source, weights, view_img, save_txt, imgsz = (
        source,
        weights,
        view_img,
        save_txt,
        img_size,
    )
    webcam = (
        source.isnumeric()
        or source.endswith(".txt")
        or source.lower().startswith(("rtsp://", "rtmp://", "http://"))
    )

    # Directories
    save_dir = Path(
        increment_path(Path(project) / name, exist_ok=exist_ok)
    )  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True
    )  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != "cpu"  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name="resnet101", n=2)  # initialize
        modelc.load_state_dict(
            torch.load("weights/resnet101.pt", map_location=device)["model"]
        ).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, "module") else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != "cpu" else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred,
            conf_thres,
            iou_thres,
            classes=classes,
            agnostic=agnostic_nms,
        )
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], "%g: " % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, "", im0s, getattr(dataset, "frame", 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / "labels" / p.stem) + (
                "" if dataset.mode == "image" else f"_{frame}"
            )  # img.txt
            s += "%gx%g " % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}s, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (
                            (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn)
                            .view(-1)
                            .tolist()
                        )  # normalized xywh
                        line = (
                            (cls, *xywh, conf) if save_conf else (cls, *xywh)
                        )  # label format
                        with open(txt_path + ".txt", "a") as f:
                            f.write(("%g " * len(line)).rstrip() % line + "\n")

                    if save_img or view_img:  # Add bbox to image
                        label = f"{names[int(cls)]} {conf:.2f}"
                        plot_one_box(
                            xyxy,
                            im0,
                            label=label,
                            color=colors[int(cls)],
                            line_thickness=3,
                        )

            # Print time (inference + NMS)
            print(f"{s}Done. ({t2 - t1:.3f}s)")

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)

            # Save results (image with detections)
            if save_img:
                if dataset.mode == "image":
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = "mp4v"  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)
                        )
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt
            else ""
        )
        print(f"Results saved to {save_dir}{s}")

    print(f"Done. ({time.time() - t0:.3f}s)")

Example #6

Show file

def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                # cv2.imshow(p, im0)
                cv2.imwrite("C:/Users/lenovo/Desktop/server/output/camera.jpg",
                            im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIterationq

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc('X', '2', '6', '4'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #7

Show file

File: track.py Project: ajcohen1/CoVision-Social-Distance-Violation-Detection

def detect(opt, save_img=False):
    global bird_image
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize the ROI frame
    cv2.namedWindow("image")
    cv2.setMouseCallback("image", get_mouse_points)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file(opt.config_deepsort)
        deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                            max_dist=cfg.DEEPSORT.MAX_DIST,
                            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                            max_age=cfg.DEEPSORT.MAX_AGE,
                            n_init=cfg.DEEPSORT.N_INIT,
                            nn_budget=cfg.DEEPSORT.NN_BUDGET,
                            use_cuda=True)

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    #initialize moving average window
    movingAverageUpdater = movingAverage.movingAverage(5)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    d = DynamicUpdate()
    d.on_launch()

    risk_factors = []
    frame_nums = []
    count = 0

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        if (frame_idx == 0):
            while True:
                image = im0s
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 7:
                    cv2.destroyWindow("image")
                    break
            four_points = mouse_pts
            # Get perspective, M is the transformation matrix for bird's eye view
            M, Minv = get_camera_perspective(image, four_points[0:4])

            # Last two points in getMousePoints... this will be the threshold distance between points
            threshold_pts = src = np.float32(np.array([four_points[4:]]))

            # Convert distance to bird's eye view
            warped_threshold_pts = cv2.perspectiveTransform(threshold_pts,
                                                            M)[0]

            # Get distance in pixels
            threshold_pixel_dist = np.sqrt(
                (warped_threshold_pts[0][0] - warped_threshold_pts[1][0])**2 +
                (warped_threshold_pts[0][1] - warped_threshold_pts[1][1])**2)

            # Draw the ROI on the output images
            ROI_pts = np.array([
                four_points[0], four_points[1], four_points[3], four_points[2]
            ], np.int32)

            # initialize birdeye view video writer
            frame_h, frame_w, _ = image.shape

            bevw = birdeye_video_writer.birdeye_video_writer(
                frame_h, frame_w, M, threshold_pixel_dist)
        else:
            break
    t = time.time()
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        print("Loop time: ", time.time() - t)
        t = time.time()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        cv2.polylines(im0s, [ROI_pts], True, (0, 255, 255), thickness=4)

        # Inferenc
        tOther = time.time()
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        print("Non max suppression and inference: ", time.time() - tOther)
        print("Pre detection time: ", time.time() - t)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                bbox_xywh = []
                bbox_xyxy = []
                confs = []

                ROI_polygon = Polygon(ROI_pts)

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    confs.append([conf.item()])
                    bbox_xyxy.append(xyxy)
                    bbox_xywh.append(obj)

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                deepsortTime = time.time()
                #outputs = deepsort.update(xywhs, confss, im0)
                print("Deepsort function call: ", (time.time() - deepsortTime))
                outputs = bbox_xyxy
                # draw boxes for visualization
                if len(outputs) > 0:
                    # filter deepsort output
                    outputs_in_ROI, ids_in_ROI = remove_points_outside_ROI(
                        bbox_xyxy, ROI_polygon)
                    center_coords_in_ROI = xywh_to_center_coords(
                        outputs_in_ROI)

                    warped_pts = birdeye_transformer.transform_center_coords_to_birdeye(
                        center_coords_in_ROI, M)

                    clusters = DBSCAN(eps=threshold_pixel_dist,
                                      min_samples=1).fit(warped_pts)
                    print(clusters.labels_)
                    draw_boxes(im0, outputs_in_ROI, clusters.labels_)

                    risk_dict = Counter(clusters.labels_)
                    bird_image = bevw.create_birdeye_frame(
                        warped_pts, clusters.labels_, risk_dict)

                    # movingAverageUpdater.updatePoints(warped_pts, ids_in_ROI)
                    #
                    # gettingAvgTime = time.time()
                    # movingAveragePairs = movingAverageUpdater.getCurrentAverage()
                    #
                    # movingAverageIds = [id for id, x_coord, y_coord in movingAveragePairs]
                    # movingAveragePts = [(x_coord, y_coord) for id, x_coord, y_coord in movingAveragePairs]
                    # embded the bird image to the video

                    # otherStuff = time.time()
                    # if(len(movingAveragePairs) > 0):
                    #     movingAvgClusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(movingAveragePts)
                    #     movingAvgClustersLables = movingAvgClusters.labels_
                    #     risk_dict = Counter(movingAvgClustersLables)
                    #     bird_image = bevw.create_birdeye_frame(movingAveragePts, movingAvgClustersLables, risk_dict)
                    #     bird_image = resize(bird_image, 20)
                    #     bv_height, bv_width, _ = bird_image.shape
                    #     frame_x_center, frame_y_center = frame_w //2, frame_h//2
                    #     x_offset = 20
                    #
                    #     im0[ frame_y_center-bv_height//2:frame_y_center+bv_height//2, \
                    #         x_offset:bv_width+x_offset ] = bird_image
                    # else:
                    #     risk_dict = Counter(clusters.labels_)
                    #     bird_image = bevw.create_birdeye_frame(warped_pts, clusters.labels_, risk_dict)
                    bird_image = resize(bird_image, 20)
                    bv_height, bv_width, _ = bird_image.shape
                    frame_x_center, frame_y_center = frame_w // 2, frame_h // 2
                    x_offset = 20

                    im0[frame_y_center - bv_height // 2:frame_y_center + bv_height // 2, \
                    x_offset:bv_width + x_offset] = bird_image

                    # print("Other stuff: ", time.time() - otherStuff)

                    #write the risk graph

                    risk_factors += [compute_frame_rf(risk_dict)]
                    frame_nums += [frame_idx]
                    graphTime = time.time()

                    if (frame_idx > 100):
                        count += 1
                        frame_nums.pop(0)
                        risk_factors.pop(0)
                    if frame_idx % 10 == 0:
                        d.on_running(frame_nums, risk_factors, count,
                                     count + 100)
                    print("Graph Time: ", time.time() - graphTime)

                # Write MOT compliant results to file
                if save_txt and len(outputs_in_ROI) != 0:
                    for j, output in enumerate(outputs_in_ROI):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            # Stream results
            if view_img:
                # cv2.imshow("bird_image", bird_image)
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:

                if dataset.mode == 'images':
                    cv2.imwrite(save_path, bird_image)
                    cv2.imwrite(save_path, im0)
                else:

                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #8

Show file

File: bus1_track.py Project: winston1214/Car_Counting

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = False
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'
    url = 'sample_url'
    uid = 'bus1'
    os.system('shutdown -r 06:00')
    memory = {}
    people_counter = 0
    car_counter = 0
    in_people = 0
    out_people = 0
    time_sum = 0
    now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
    
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)
            img_center_x = int(im0.shape[1]//2)
            # line = [(0,img_center_y),(im0.shape[1],img_center_y)]
            line = [(int(img_center_x + 50),0),(img_center_x+50,int(im0.shape[0]))]
            line2 = [(int(img_center_x + 170),0),(img_center_x+170,int(im0.shape[0]))]
            cv2.line(im0,line[0],line[1],(0,0,255),5)
            cv2.line(im0,line2[0],line2[1],(0,255,0),5)
          
            
            if det is not None and len(det):
                
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()
                crop_xyxy = det[:,:4]
                det = det[crop_xyxy[:,0]<img_center_x + 170] # line 오른쪽 지우기
                if len(det) == 0:
                    pass
                else:

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string
                    
                    bbox_xywh = []
                    confs = []
                    bbox_xyxy = []


                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)

                        
                        obj = [x_c, y_c, bbox_w, bbox_h,int(cls)]
                    
                        #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10)
                        bbox_xywh.append(obj)
                        # bbox_xyxy.append(rec)
                        confs.append([conf.item()])
                        


                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0) # deepsort
                    index_id = []
                    previous = memory.copy()
                    memory = {}
                    boxes = []
                    names_ls = []



                    # draw boxes for visualization
                    if len(outputs) > 0:
                        
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -2]
                        labels = outputs[:,-1]
                        dic = {0:'person',2:'car'}
                        for i in labels:
                            names_ls.append(dic[i])
                        
                        # print('output len',len(outputs))
                        for output in outputs:
                            boxes.append([output[0],output[1],output[2],output[3]])
                            index_id.append('{}-{}'.format(names_ls[-1],output[-2]))

                            memory[index_id[-1]] = boxes[-1]

                        if time_sum>=60:
                            param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
                            response = requests.post(url,data=param)
                            response_text = response.text
                            with open('counting.txt','a') as f:
                                f.write('{}~{} IN : {}, Out : {} Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))

                            people_counter,car_counter,in_people,out_people = 0,0,0,0
                            time_sum = 0
                            now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
                        i = int(0)
                        for box in boxes:
                            # extract the bounding box coordinates
                            (x, y) = (int(box[0]), int(box[1]))
                            (w, h) = (int(box[2]), int(box[3]))


                            if index_id[i] in previous:
                                previous_box = previous[index_id[i]]
                                (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                                (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                                p0 = (int(x + (w-x)/2), int(y + (h-y)/2))
                                p1 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2))
                                
                                cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point
                            
                                
                                if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'person':
                                    people_counter += 1
                                    if p0[0] > line[1][0]:
                                        in_people +=1
                                    else:
                                        out_people +=1
                                if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'car':
                                    car_counter +=1
                                
                                
    
                            i += 1

                        draw_boxes(im0,bbox_xyxy,identities,labels)
                            
                        

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                                            bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format       

            else:
                deepsort.increment_ages()
            cv2.putText(im0, 'In : {}, Out : {}'.format(in_people,out_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            cv2.putText(im0, 'Person : {}'.format(people_counter), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            # Print time (inference + NMS)
            if time_sum>=60:
                param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
                response = requests.post(url,data=param)
                response_text = response.text
                with open('counting.txt','a') as f:
                    f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))

                people_counter,car_counter,in_people,out_people = 0,0,0,0
                time_sum = 0
                now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
            
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            time_sum += t2-t1
            


            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(save_path, im0)
                else:
                    
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
    param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
    response = requests.post(url,data=param)
    response_text = response.text
    with open('counting.txt','a') as f:
        f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))
    print('Done. (%.3fs)' % (time.time() - t0))

Example #9

Show file

def detect(opt):
    out, source, yolo_model, deep_sort_model, show_vid, save_vid, save_txt, imgsz, evaluate, half, \
        project, exist_ok, update, save_crop = \
        opt.output, opt.source, opt.yolo_model, opt.deep_sort_model, opt.show_vid, opt.save_vid, \
        opt.save_txt, opt.imgsz, opt.evaluate, opt.half, opt.project, opt.exist_ok, opt.update, opt.save_crop
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    # Directories
    if type(yolo_model) is str:  # single yolo model
        exp_name = yolo_model.split(".")[0]
    elif type(yolo_model) is list and len(yolo_model) == 1:  # single models after --yolo_model
        exp_name = yolo_model[0].split(".")[0]
    else:  # multiple models after --yolo_model
        exp_name = "ensemble"
    exp_name = exp_name + "_" + deep_sort_model.split('/')[-1].split('.')[0]
    save_dir = increment_path(Path(project) / exp_name, exist_ok=exist_ok)  # increment run if project name exists
    (save_dir / 'tracks' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    model = DetectMultiBackend(yolo_model, device=device, dnn=opt.dnn)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    # Dataloader
    if webcam:
        show_vid = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        nr_sources = len(dataset)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        nr_sources = 1
    vid_path, vid_writer, txt_path = [None] * nr_sources, [None] * nr_sources, [None] * nr_sources

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)

    # Create as many trackers as there are video sources
    deepsort_list = []
    for i in range(nr_sources):
        deepsort_list.append(
            DeepSort(
                deep_sort_model,
                device,
                max_dist=cfg.DEEPSORT.MAX_DIST,
                max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
            )
        )
    outputs = [None] * nr_sources

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run tracking
    model.warmup(imgsz=(1 if pt else nr_sources, 3, *imgsz))  # warmup
    dt, seen = [0.0, 0.0, 0.0, 0.0], 0
    for frame_idx, (path, im, im0s, vid_cap, s) in enumerate(dataset):
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255.0  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path[0]).stem, mkdir=True) if opt.visualize else False
        pred = model(im, augment=opt.augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, opt.classes, opt.agnostic_nms, max_det=opt.max_det)
        dt[2] += time_sync() - t3

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            seen += 1
            if webcam:  # nr_sources >= 1
                p, im0, _ = path[i], im0s[i].copy(), dataset.count
                p = Path(p)  # to Path
                s += f'{i}: '
                txt_file_name = p.name
                save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
            else:
                p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)
                p = Path(p)  # to Path
                # video file
                if source.endswith(VID_FORMATS):
                    txt_file_name = p.stem
                    save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
                # folder with imgs
                else:
                    txt_file_name = p.parent.name  # get folder name containing current img
                    save_path = str(save_dir / p.parent.name)  # im.jpg, vid.mp4, ...

            txt_path = str(save_dir / 'tracks' / txt_file_name)  # im.txt
            s += '%gx%g ' % im.shape[2:]  # print string
            imc = im0.copy() if save_crop else im0  # for save_crop

            annotator = Annotator(im0, line_width=2, pil=not ascii)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                t4 = time_sync()
                outputs[i] = deepsort_list[i].update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                t5 = time_sync()
                dt[3] += t5 - t4

                # draw boxes for visualization
                if len(outputs[i]) > 0:
                    for j, (output, conf) in enumerate(zip(outputs[i], confs)):

                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left,  # MOT format
                                                               bbox_top, bbox_w, bbox_h, -1, -1, -1, i))

                        if save_vid or save_crop or show_vid:  # Add bbox to image
                            c = int(cls)  # integer class
                            label = f'{id} {names[c]} {conf:.2f}'
                            annotator.box_label(bboxes, label, color=colors(c, True))
                            if save_crop:
                                txt_file_name = txt_file_name if (isinstance(path, list) and len(path) > 1) else ''
                                save_one_box(bboxes, imc, file=save_dir / 'crops' / txt_file_name / names[c] / f'{id}' / f'{p.stem}.jpg', BGR=True)

                LOGGER.info(f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)')

            else:
                deepsort_list[i].increment_ages()
                LOGGER.info('No detections')

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_vid:
                if vid_path[i] != save_path:  # new video
                    vid_path[i] = save_path
                    if isinstance(vid_writer[i], cv2.VideoWriter):
                        vid_writer[i].release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                    save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
                    vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer[i].write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms deep sort update \
        per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_vid:
        s = f"\n{len(list(save_dir.glob('tracks/*.txt')))} tracks saved to {save_dir / 'tracks'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(yolo_model)  # update model (to fix SourceChangeWarning)

Example #10

Show file

def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')

Example #11

Show file

File: track.py Project: LordSquishers/Pupper_Repo

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    last_time = time.time()

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'
    print('starting predictions...')

    # static vars
    time_total_start = 0

    curve_goal = None

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):

        if curve_goal is not None:  # CURVE ACTION
            # eval curve
            total_change = np.array([0], dtype='float64')
            total_time = 10
            time_elapsed = time_synchronized() - time_total_start

            x_start = curve_goal.evaluate(time_elapsed / total_time)[0]
            for a in range(int(time_elapsed * 32),
                           int((time_elapsed + 0.25) * 32)):
                if time_elapsed + 0.25 > total_time:
                    break
                total_change += curve_goal.evaluate(
                    a / (total_time * 32))[0] - x_start
                # print(total_change)
                # if total_change? > 9999999:
                #     print('broke at', total_change, '[max 999999]')

            total_change /= (time_elapsed + 0.5) / total_time

            print((total_change / NORMALIZATION_CONSTANT)[0])
            # print('time', time_elapsed/total_time, '\n')

            rx = max(min(total_change / NORMALIZATION_CONSTANT, [1]), [-1])[0]

            vals['rx'] = round(rx, 5)
            vals['ly'] = 1
            vals['trot'] = 1
            # print(vals['rx'])

            if time_synchronized() - time_total_start > total_time:
                curve_goal = None
                time_total_start = 0

                vals['trot'] = 0
                vals['ly'] = 0
                # sys.exit()
        else:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       opt.conf_thres,
                                       opt.iou_thres,
                                       classes=opt.classes,
                                       agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                s += '%gx%g ' % img.shape[2:]  # print string
                save_path = str(Path(out) / Path(p).name)

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    bbox_xywh = []
                    confs = []

                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])

                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0)

                    # draw boxes for visualization
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        draw_boxes(im0, bbox_xyxy, identities)

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        bezier_points = np.zeros((2 * len(outputs), 2))
                        idx = 0

                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') %
                                        (frame_idx, identity, bbox_left,
                                         bbox_top, bbox_w, bbox_h, -1, -1, -1,
                                         -1))  # label format
                            # calculating rotation and movement for dog!
                            # gotta use vals['...']
                            # dim CAM_WIDTH x CAM_HEIGHT
                            # rotation calculation
                            middle_x = (bbox_left + bbox_w) / 2

                            # translation calculation
                            percent_filled_y = (bbox_h - bbox_top) / CAM_HEIGHT
                            percent_filled_y *= 100

                            # GENERATE TOLERANCE #
                            max_width_tolerance = 0.375 * (bbox_w - bbox_left)
                            left_bound = bbox_left - max_width_tolerance
                            right_bound = bbox_w + max_width_tolerance

                            # GENERATE DISTANCE #
                            distance_rel = math.e**(-percent_filled_y / 30)
                            # print('left, middle, right', left_bound, middle_x, right_bound)
                            # print('dist', distance_rel, 'pct', percent_filled_y)

                            # GENERATE POINTS #
                            if percent_filled_y < 1.:
                                bezier_points[idx][0] = -1
                                bezier_points[idx][1] = -1
                                bezier_points[idx + 1][0] = -1
                                bezier_points[idx + 1][1] = -1
                            else:
                                if idx > 1:  # relative to last box
                                    midpoint = bezier_points[idx - 1][
                                        0]  # exit point of last node
                                    bezier_points[idx][
                                        0] = left_bound if middle_x >= midpoint else right_bound
                                    bezier_points[idx][
                                        1] = distance_rel - 0.005
                                    bezier_points[idx + 1][
                                        0] = left_bound if middle_x >= midpoint else right_bound
                                    bezier_points[idx +
                                                  1][1] = distance_rel + 0.005
                                else:  # rel to middle
                                    bezier_points[idx][
                                        0] = left_bound if middle_x >= (
                                            CAM_WIDTH / 2) else right_bound
                                    bezier_points[idx][
                                        1] = distance_rel - 0.005
                                    bezier_points[
                                        idx +
                                        1][0] = left_bound if middle_x >= (
                                            CAM_WIDTH / 2) else right_bound
                                    bezier_points[idx +
                                                  1][1] = distance_rel + 0.005

                            idx += 2

                        if cv2.waitKey(1) == ord('f'):
                            points = list()
                            skipped_boxes = list()
                            skip_idx = -1
                            for a in range(bezier_points.shape[0]):
                                x = bezier_points[a][0]
                                y = bezier_points[a][1]

                                if bezier_points.shape[0] > a + 1 and abs(
                                        y - bezier_points[a + 1][1]) < .001:
                                    skip_idx = a + 1

                                if x < 0 or y < 0:
                                    continue

                                if bezier_points.shape[0] > a + 3 and (
                                        a != skip_idx
                                ) and abs(
                                        x - bezier_points[a + 2][0]
                                ) > CAM_WIDTH * 0.2:  # threshold for ignorance
                                    # print('skipping', a + 2)
                                    skipped_boxes.append(a + 2)
                                    skipped_boxes.append(a + 3)

                            for skipped_idx in skipped_boxes:
                                bezier_points[skipped_idx][0] = -1
                                bezier_points[skipped_idx][1] = -1

                            for a in range(bezier_points.shape[0]):
                                x = bezier_points[a][0]
                                y = bezier_points[a][1]
                                if x < 0 or y < 0:
                                    continue
                                points.append((x, y))

                            far_pt = 0
                            if len(points) > 0:
                                far_pt = points[-1][1] + 1
                            points.append((CAM_WIDTH / 2, 0))
                            points.append((CAM_WIDTH / 2, far_pt))
                            points.sort(key=y_coord_sort)

                            nodes_curve_norm = np.swapaxes(
                                np.array(points), 1, 0)
                            nodes_curve = np.asfortranarray(nodes_curve_norm)
                            # print(frame_idx, nodes_curve)
                            print('calculating curve on frame', frame_idx)

                            curve = bezier.Curve(nodes_curve,
                                                 degree=(nodes_curve.shape[1] -
                                                         1))

                            # DISPLAY DATA #

                            # x = left/right bound [0,720]
                            # y = distance [1, e**-3]
                            for j, output in enumerate(outputs):
                                bbox_left = output[0]
                                bbox_top = output[1]
                                bbox_w = output[2]
                                bbox_h = output[3]

                                percent_filled_y = (bbox_h -
                                                    bbox_top) / CAM_HEIGHT
                                percent_filled_y *= 100
                                distance_rel = math.e**(-percent_filled_y / 30)

                                plt.plot([bbox_left, bbox_w],
                                         [distance_rel, distance_rel])
                            plot_bez(curve, frame_idx)

                            # set curve
                            time_total_start = time_synchronized()
                            curve_goal = curve

                            # sys.exit()

                else:
                    deepsort.increment_ages()

                # Print time (inference + NMS)
                # print('%sDone. (%.3fs)' % (s, t2 - t1))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    print('saving img!')
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        print('saving video!')
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release(
                                )  # release previous video writer

                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(
                                save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                                fps, (w, h))
                        vid_writer.write(im0)
        if not DEBUG_MODE:
            if time.time() - last_time > cooldown:  # so we dont spam
                send_commands(vals)
                last_time = time.time()
            events = sel.select(timeout=1)
            if events:
                for key, mask in events:
                    service_connection(key, mask)
            if not sel.get_map():
                break

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #12

Show file

File: run.py Project: z430/yolo-action-recognition

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = view_img
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    # save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    vid = cv2.VideoCapture(source)

    filename = os.path.basename(source).split('.')[0]
    save_path = f"results/{filename}_action.mp4"
    fps = vid.get(cv2.CAP_PROP_FPS)
    w = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*("mp4v")), fps, (w, h))
    # ffmpeg setup
    pipe = Popen([
        'ffmpeg', '-loglevel', 'quiet', '-y', '-f', 'image2pipe', '-vcodec',
        'mjpeg', '-framerate', f'{fps}', '-i', '-', '-vcodec', 'libx264',
        '-crf', '28', '-preset', 'veryslow', '-framerate', f'{fps}',
        f'{save_path}'
    ],
                 stdin=PIPE)
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    pbar = tqdm(total=length, position=0, leave=True)
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        start = time.time()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, im0 = path, im0s

            # s += '%gx%g ' % img.shape[2:]  # print string
            # save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                im0 = deepsort.update(xywhs, confss, im0)

                # # draw boxes for visualization
                # if len(outputs) > 0:
                #     bbox_xyxy = outputs[:, :4]
                #     identities = outputs[:, -1]
                #     draw_boxes(im0, bbox_xyxy, identities)

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            runtime_fps = 1 / (time.time() - start)
            # print(f"Runtime FPS: {runtime_fps:.2f}")
            pbar.set_description(f"runtime_fps: {runtime_fps}")
            pbar.update(1)
            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            # vid_writer.write(im0)
            im0 = Image.fromarray(im0[..., ::-1])
            # print(im0)
            im0.save(pipe.stdin, 'JPEG')

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        # vid_writer.release()
        pipe.stdin.close()
        pipe.wait()
        pbar.close()

    print('Done. (%.3fs)' % (time.time() - t0))

Example #13

Show file

File: track.py Project: Borda/Yolov5_DeepSort

def detect(opt):
    out, source, yolo_weights, deep_sort_weights, show_vid, save_vid, save_txt, imgsz, evaluate = \
        opt.output, opt.source, opt.yolo_weights, opt.deep_sort_weights, opt.show_vid, opt.save_vid, \
            opt.save_txt, opt.img_size, opt.evaluate
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    attempt_download(deep_sort_weights, repo='mikel-brostrom/Yolov5_DeepSort_Pytorch')
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA
    # Load model
    model = attempt_load(yolo_weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()

    save_path = str(Path(out))
    # extract what is in between the last '/' and last '.'
    txt_file_name = source.split('/')[-1].split('.')[0]
    txt_path = str(Path(out)) + '/' + txt_file_name + '.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_sync()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_sync()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            annotator = Annotator(im0, line_width=2, pil=not ascii)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                
                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)): 
                        
                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        c = int(cls)  # integer class
                        label = f'{id} {names[c]} {conf:.2f}'
                        annotator.box_label(bboxes, label, color=colors(c, True))

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                               f.write(('%g ' * 10 + '\n') % (frame_idx, id, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path += '.mp4'

                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer.write(im0)

    if save_txt or save_vid:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #14

Show file

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz, GCP_list = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.GCP_list
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    # 속도까지 붙여버린 데이터 따로 생성해서 비교해보자 : 수정수정
    txt_path_raw2 = str(Path(out)) + '/results_raw2.txt'

    # point load
    with open('./mapdata/point.yaml') as f:
        data = yaml.load(f.read())
    frm_point = data['frm_point']
    geo_point = data['geo_point']

    Counter_1 = [(488, 589), (486, 859)]
    Counter_2 = [(3463, 795), (3487, 1093)]
    Counter_list = [Counter_1, Counter_2]

    datum_dist = []
    counter_dist = []

    line_fileName = './mapdata/Busan1_IC_Polyline_to_Vertex.txt'
    all_line = mapdata_load(line_fileName, frm_point, geo_point)

    percep_frame = 5
    from _collections import deque
    pts = [deque(maxlen=percep_frame + 1) for _ in range(10000)]
    ptsSpeed = [deque(maxlen=1) for _ in range(10000)]

    frame_len = calc_dist(frm_point[1], frm_point[4])
    geo_len = calc_dist(geo_point[1], geo_point[4])

    # ----------------- fix val start
    fixcnt = 1
    # ----------------- fix val end

    # ----------------- counter val start
    memory_index = {}
    memory_id = {}

    cnt = np.zeros((len(Counter_list), 4))
    # total_counter = 0 # 나중에 총 카운터를 만들어 넣으면 되겠지?

    # count_1_total = 0
    # count_1_veh_c0 = 0
    # count_1_veh_c1 = 0
    # count_1_veh_c2 = 0

    # count_2_total = 0
    # count_2_veh_c0 = 0
    # count_2_veh_c1 = 0
    # count_2_veh_c2 = 0
    # ----------------- counter val end

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss

                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []

                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                """
                # output 형식

                [[박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                ...]
                """

                # ------------------------------------------------------------------------------------------------------ img fix start
                t3 = time_synchronized()
                match_mid_point_list = matcher_BRISK_BF(im0, GCP_list)
                t4 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- line start

                # 기준점 위치 갱신을 위한 삼변측량의 거리 정의 및 고정
                if frame_idx == 0:
                    for pointNum in range(len(frm_point)):
                        for GCP_num in range(len(match_mid_point_list)):
                            datum_dist.append(
                                point_dist(match_mid_point_list[GCP_num],
                                           frm_point[pointNum]))
                    datum_dist = np.reshape(
                        datum_dist,
                        (len(frm_point), len(match_mid_point_list)))
                    for Ct_list in Counter_list:
                        for Ctpoint_num in range(len(Ct_list)):
                            for GCP_num in range(len(match_mid_point_list)):
                                counter_dist.append(
                                    point_dist(match_mid_point_list[GCP_num],
                                               Ct_list[Ctpoint_num]))
                    counter_dist = np.reshape(counter_dist,
                                              (len(Counter_list), len(Ct_list),
                                               len(match_mid_point_list)))
                t5 = time_synchronized()

                pre_P = (0, 0)

                for line_num, eachline in enumerate(all_line):
                    for newpoint in eachline['frmPoint']:
                        if line_num == 0:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 0, 255),
                                             -1)  # 차선_실선
                            if calc_dist(pre_P, newpoint) < 390:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 0, 255), 2, -1)
                        elif line_num == 1:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 255, 0),
                                             -1)  # 도로 경계
                            if calc_dist(pre_P, newpoint) < 420:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 255, 0), 2, -1)
                        elif line_num == 2:
                            im0 = cv2.circle(im0, newpoint, 5, (255, 0, 0),
                                             -1)  # 차선_겹선
                            if calc_dist(pre_P, newpoint) < 350:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (255, 0, 0), 2, -1)
                        else:
                            im0 = cv2.circle(im0, newpoint, 5, (100, 100, 0),
                                             -1)  # 차선_점선
                            if calc_dist(pre_P, newpoint) < 600:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (100, 100, 0), 2, -1)
                        pre_P = newpoint

                t6 = time_synchronized()
                for pointNum in range(len(frm_point)):
                    im0 = cv2.circle(im0, frm_point[pointNum], 10, (0, 0, 0),
                                     -1)
                    newPoint = intersectionPoint(match_mid_point_list,
                                                 datum_dist[pointNum])
                    frm_point[pointNum] = newPoint

                t7 = time_synchronized()

                #---------------------------------------------------------------------------------------------------------------------- line end

                # ------------------------------------------------------------------------------------------------------ img fix end

                # ------------------------------------------------------------------------------------------------------ counting num and class start
                Counter_newpoint = []
                for Ct_num in range(len(Counter_list)):
                    Ct_list = Counter_list[Ct_num]
                    for Ctpoint_num in range(len(Ct_list)):
                        Counter_newpoint.append(
                            intersectionPoint(
                                match_mid_point_list,
                                counter_dist[Ct_num][Ctpoint_num]))
                Counter_newpoint = np.reshape(
                    Counter_newpoint, (len(Counter_list), len(Ct_list), 2))

                for CountNum in Counter_newpoint:
                    im0 = cv2.line(im0, tuple(CountNum[0]), tuple(CountNum[1]),
                                   (0, 0, 0), 5, -1)

                boxes = []
                indexIDs = []
                classIDs = []
                previous_index = memory_index.copy()
                previous_id = memory_id.copy()
                memory_index = {}
                memory_id = {}
                COLORS = np.random.randint(0,
                                           255,
                                           size=(200, 3),
                                           dtype="uint8")
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        boxes.append(
                            [output[0], output[1], output[2], output[3]])
                        indexIDs.append(int(output[4]))
                        classIDs.append(int(output[5]))
                        memory_index[indexIDs[-1]] = boxes[
                            -1]  # 인덱스 아이디와 박스를 맞춰줌
                        memory_id[indexIDs[-1]] = classIDs[
                            -1]  # 인덱스 아이디와 클레스 아이디를 맞춰줌

                        if len(pts[output[4]]) == 0:
                            pts[output[4]].append(frame_idx)
                        center = (int(((output[0]) + (output[2])) / 2),
                                  int(((output[1]) + (output[3])) / 2))
                        pts[output[4]].append(center)
                        if len(pts[output[4]]) == percep_frame + 1:
                            frmMove_len = np.sqrt(
                                pow(
                                    pts[output[4]][-1][0] -
                                    pts[output[4]][-percep_frame][0], 2) + pow(
                                        pts[output[4]][-1][1] -
                                        pts[output[4]][-percep_frame][1], 2))
                            geoMove_Len = geo_len * frmMove_len / frame_len
                            speed = geoMove_Len * vid_cap.get(
                                cv2.CAP_PROP_FPS) * 3.6 / (pts[output[4]][0] -
                                                           frame_idx)
                            ptsSpeed[output[4]].append(speed)
                            pts[output[4]].clear()

                if len(boxes) > 0:
                    i = int(0)
                    for box in boxes:
                        # 현 위치와 이전 위치를 비교하여 지나갔는지 체크함
                        (x, y) = (int(box[0]), int(box[1]))  # Output 0 1
                        (w, h) = (int(box[2]), int(box[3]))  # Output 2 3 과 같다.
                        color = compute_color_for_labels(indexIDs[i])

                        if indexIDs[i] in previous_index:
                            previous_box = previous_index[indexIDs[i]]
                            # print()
                            # print('previous_box : ')
                            # print(previous_box)
                            (x2, y2) = (int(previous_box[0]),
                                        int(previous_box[1]))
                            (w2, h2) = (int(previous_box[2]),
                                        int(previous_box[3]))
                            p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2)
                                  )  # 현재 박스
                            p1 = (int(x2 + (w2 - x2) / 2),
                                  int(y2 + (h2 - y2) / 2))  # 이전 박스
                            cv2.line(
                                im0, p0, p1, color, 3
                            )  # 이전 정보와 비교 : 중앙에 점을 찍어 가면서 (이전 데이터와 검지 데이터의 점)

                            # 클레스 구분
                            previous_class_id = previous_id[
                                indexIDs[i]]  # 어차피 인덱스 같기 때문에 그냥 넣어줘도 됨 개꿀ㅋ

                            # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(내가 맨든 결과물)
                            # 프레임 수, 인덱스 아이디, 클레스 이름, x좌표, y좌표, w값, h값, 속도값, null, null
                            # with open(txt_path_raw2, 'a') as f:
                            #     f.write(('%g ' * 10+ '\n') % (frame_idx, indexIDs[i], previous_class_id,
                            #                                 p0[0], p0[1], box[2], box[3], -1, -1))  # label format

                            for cntr in range(len(Counter_newpoint)):
                                if intersect(p0, p1, Counter_newpoint[cntr][0],
                                             Counter_newpoint[cntr]
                                             [1]):  # 실질적으로 체크함
                                    if previous_class_id == 0:
                                        cnt[cntr][1] += 1
                                    elif previous_class_id == 1:
                                        cnt[cntr][2] += 1
                                    elif previous_class_id == 2:
                                        cnt[cntr][3] += 1
                                    cnt[cntr][0] += 1

                        i += 1  # 다음 인덱스와 비교하게 만들기 위하여

                # draw counter
                for cntr in range(len(Counter_newpoint)):
                    cv2.putText(im0, 'count_{}_total : {}'.format(
                        cntr + 1, cnt[cntr][0]), (100 + 400 * cntr, 110),
                                cv2.FONT_HERSHEY_DUPLEX, 1.0, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[0],
                        cnt[cntr][1]), (100 + 400 * cntr, 140),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[1],
                        cnt[cntr][2]), (100 + 400 * cntr, 170),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[2],
                        cnt[cntr][3]), (100 + 400 * cntr, 200),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                t8 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- counter end

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities, names,
                               ptsSpeed)

                t9 = time_synchronized()
                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):  # 한 라인씩 쓰는 구조
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(
                                txt_path_raw, 'a'
                        ) as f:  # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 + '%g' * 1 + '%g ' * 3 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, classname, -1, -1,
                                     -1))  # label format

            # else:
            #     deepsort.increment_ages()
            t10 = time_synchronized()
            # Print time (inference + NMS + classify)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            t11 = time_synchronized()
            # Save results (image with detections)
            # dataset.mode = 'images'
            # save_path = './track_result/output/{}.jpg'.format(i)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            t12 = time_synchronized()

            print('inference + NMS + classify (%.3fs)' % (t2 - t1))
            print('Yolo + DeepSORT (%.3fs)' % (t3 - t2))
            print('find mid point (%.3fs)' % (t4 - t3))
            print('삼변측량을 위한 기준거리 산정 (%.3fs)' % (t5 - t4))
            print('draw line (%.3fs)' % (t6 - t5)
                  )  # 현재는 정밀도로지도에 있는 모든 점들을 대상 계산중 -> 추후 화면에 표시될 점만 계산하는 작업 필요
            print('GCP 점 계산 (%.3fs)' % (t7 - t6))
            print('Count & speed (%.3fs)' % (t8 - t7))
            print('각차량별 그리기 (%.3fs)' % (t9 - t8))
            print('txt 데이터 저장 (%.3fs)' % (t10 - t9))
            print('스크린에 표시하기 (%.3fs)' % (t11 - t10))
            print('비디오파일로 저장하기 (%.3fs)' % (t12 - t11))
            print('one frame done (%.3fs)' % (t12 - t1))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #15

Show file

def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Find index corresponding to a person
    idx_person = names.index("person")

    # SORT: initialize the tracker
    mot_tracker = sort_module.Sort(max_age=opt.max_age,
                                   min_hits=opt.min_hits,
                                   iou_threshold=opt.iou_threshold)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # SORT: number of people detected
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :-1].to(
                    "cpu")  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers = mot_tracker.update(dets_ppl)
                    for d in trackers:
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #16

Show file

File: data_ingest.py Project: arsalhuda24/Realtime-Trajectory-Prediction-AWS

def rtsp_to_mongodb():

    with open("/home/asyed/airflow/dags/parameters.json") as f:

        parms = json.load(f)

    agnostic_nms = parms["agnostic_nms"]
    augment = parms["augment"]
    classes = parms["classes"]
    conf_thres = parms["conf_thres"]
    config_deepsort = parms["config_deepsort"]
    deep_sort_model = parms["deep_sort_model"]
    device = parms["device"]
    dnn = False
    evaluate = parms["evaluate"]
    exist_ok = parms["exist_ok"]
    fourcc = parms["fourcc"]
    half = False
    print(device)
    imgsz = parms["imgsz"]
    iou_thres = parms["iou_thres"]
    max_det = parms["max_det"]
    name = parms["name"]
    # save_vid = parms["save_vid"]
    #show_vid = parms["show_vid"]
    source = parms["source"]
    visualize = parms["visualize"]
    yolo_model = parms["yolo_model"]
    webcam = parms["webcam"]
    save_txt = parms["save_txt"]
    homography = np.array(parms["homography"])

    url = "mongodb://localhost:27017"
    client = MongoClient(url)
    db = client.trajectory_database

    today_date = date.today().strftime("%m-%d-%y")
    new = "file_image_coordinates_" + today_date
    collection = db[new]

    cfg = get_config()
    cfg.merge_from_file(config_deepsort)

    deepsort = DeepSort(deep_sort_model,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    # make new output folder

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(yolo_model, device=device, dnn=dnn)
    stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays

    cudnn.benchmark = True  # set True to speed up constant image size inference

    dataset = LoadStreams(source,
                          img_size=imgsz,
                          stride=stride,
                          auto=pt and not jit)

    bs = len(dataset)  # batch_size

    vid_path, vid_writer = [None] * bs, [None] * bs

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.model.parameters())))  # warmup
        # global framess_im2

        dt, seen = [0.0, 0.0, 0.0, 0.0], 0
        # arr = None
        past = []
        for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):

            t1 = time_sync()
            img = torch.from_numpy(img).to(device)
            # print("raw_frame",img.shape)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            t2 = time_sync()
            dt[0] += t2 - t1

            pred = model(img, augment=augment, visualize=visualize)
            t3 = time_sync()
            dt[1] += t3 - t2

            pred = non_max_suppression(pred,
                                       conf_thres,
                                       iou_thres,
                                       classes,
                                       agnostic_nms,
                                       max_det=max_det)
            dt[2] += time_sync() - t3

            # Process detections

            # dets_per_img = []
            for i, det in enumerate(pred):  # detections per image
                seen += 1
                if webcam:  # batch_size >= 1
                    p, im0, _ = path[i], im0s[i].copy(), dataset.count

                    s += f'{i}: '
                else:
                    p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)

                annotator = Annotator(im0, line_width=2, pil=not ascii)

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    xywhs = xyxy2xywh(det[:, 0:4])
                    confs = det[:, 4]
                    clss = det[:, 5]

                    # pass detections to deepsort
                    t4 = time_sync()
                    outputs = deepsort.update(xywhs.cpu(), confs.cpu(),
                                              clss.cpu(), im0)
                    t5 = time_sync()
                    dt[3] += t5 - t4

                    if len(outputs) > 0:
                        for j, (output, conf) in enumerate(zip(outputs,
                                                               confs)):
                            bboxes = output[0:4]
                            id = output[4]
                            cls = output[5]

                            c = int(cls)  # integer class
                            label = f'{id} {names[c]} {conf:.2f}'
                            annotator.box_label(bboxes,
                                                label,
                                                color=colors(c, True))

                            if save_txt:
                                # to MOT format
                                bbox_left = output[0]
                                bbox_top = output[1]
                                bbox_w = output[2] - output[0]
                                bbox_h = output[3] - output[1]
                                # bbox_left = bbox_left + bbox_h
                                bbox_top = bbox_top + bbox_h

                                agent_data = {
                                    'frame': int(frame_idx + 1),
                                    'agent_id': int(id),
                                    "labels": str(names[c]),
                                    "x": int(bbox_left),
                                    "y": int(bbox_top)
                                }

                                print("agent", agent_data)

                                collection.insert_one(agent_data)

                                #db.object_detection.insert_one(agent_data)
                                #db.pedestrian_detection_15_june.insert_one(agent_data)
                                #db.test_21_july.insert_one(agent_data)

                    LOGGER.info(
                        f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)'
                    )

                else:
                    deepsort.increment_ages()
                    LOGGER.info('No detections')

                im0 = annotator.result()

Example #17

Show file

File: track.py Project: rahultan26/rahul26

def detect(opt):
    memory = {}
    counter = 0
    out, source, yolo_model, deep_sort_model, show_vid, save_vid, save_txt, imgsz, evaluate, half, project, name, exist_ok= \
        opt.output, opt.source, opt.yolo_model, opt.deep_sort_model, opt.show_vid, opt.save_vid, \
        opt.save_txt, opt.imgsz, opt.evaluate, opt.half, opt.project, opt.name, opt.exist_ok
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(deep_sort_model,
                        torch.device("cpu"),
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET)

    # Initialize
    device = select_device(opt.device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    save_dir.mkdir(parents=True, exist_ok=True)  # make dir

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(yolo_model, device=device, dnn=opt.dnn)
    stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    # Dataloader
    if webcam:
        show_vid = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source,
                              img_size=imgsz,
                              stride=stride,
                              auto=pt and not jit)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source,
                             img_size=imgsz,
                             stride=stride,
                             auto=pt and not jit)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # extract what is in between the last '/' and last '.'
    txt_file_name = source.split('/')[-1].split('.')[0]
    txt_path = str(Path(save_dir)) + '/' + txt_file_name + '.txt'

    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.model.parameters())))  # warmup
    dt, seen = [0.0, 0.0, 0.0, 0.0], 0
    regionid = set()
    for frame_idx, (path, img, im0s, vid_cap, s) in enumerate(dataset):
        t1 = time_sync()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem,
                                   mkdir=True) if opt.visualize else False
        pred = model(img, augment=opt.augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   opt.classes,
                                   opt.agnostic_nms,
                                   max_det=opt.max_det)
        dt[2] += time_sync() - t3
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, _ = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg, vid.mp4, ...
            s += '%gx%g ' % img.shape[2:]  # print string

            annotator = Annotator(im0,
                                  line_width=2,
                                  font='Arial.ttf',
                                  pil=not ascii)

            if det is not None and len(det):
                tboxes = []
                indexIDs = []
                previous = memory.copy()
                memory = {}
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                t4 = time_sync()
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(),
                                          im0)
                t5 = time_sync()
                dt[3] += t5 - t4

                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)):

                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]
                        roi = [(0, 0), (640, 0), (640, 380), (0, 380)]

                        (x, y) = (int(bboxes[0]), int(bboxes[1]))
                        (w, h) = (int(bboxes[2]), int(bboxes[3]))
                        inside = cv2.pointPolygonTest(np.array(roi), (x, h),
                                                      False)
                        if inside > 0:
                            regionid.add(id)

                        c = int(cls)  # integer class
                        label = f' {names[c]} {conf:.2f}'
                        cv2.putText(im0, "count =" + str(len(regionid)),
                                    (20, 50), 0, 1, (100, 200, 0), 2)
                        annotator.box_label(bboxes,
                                            label,
                                            color=colors(c, True))
                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (
                                    frame_idx + 1,
                                    id,
                                    bbox_left,  # MOT format
                                    bbox_top,
                                    bbox_w,
                                    bbox_h,
                                    -1,
                                    -1,
                                    -1,
                                    -1))

                LOGGER.info(
                    f'{s}Done. YOLO:({t3 - t2:.3f}s), DeepSort:({t5 - t4:.3f}s)'
                )
                LOGGER.info(f'counter = {len(regionid)}')

            else:
                deepsort.increment_ages()
                LOGGER.info('No detections')

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]

                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                        (w, h))
                vid_writer.write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(
        f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms deep sort update \
        per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_vid:
        print('Results saved to %s' % save_path)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

Example #18

Show file

File: track_yolov5_counter.py Project: ruidongjr/queue_tracking

def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://',
             'http://')) or source.lower().startswith('intel')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    folder_main = out.split('/')[0]
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    folder_features = folder_main + '/features'
    if os.path.exists(folder_features):
        shutil.rmtree(folder_features)  # delete features output folder
    folder_crops = folder_main + '/image_crops'
    if os.path.exists(folder_crops):
        shutil.rmtree(folder_crops)  # delete output folder with object crops
    os.makedirs(out)  # make new output folder
    os.makedirs(folder_features)  # make new output folder
    os.makedirs(folder_crops)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights[0],
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        if source.lower().startswith('intel'):
            dataset = LoadRealSense2()
            save_img = True
        else:
            dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # frames per second
    # TODO if use intel or if use given footage
    fps = 30  # dataset.cap.get(cv2.CAP_PROP_FPS)
    critical_time_frames = opt.time * fps

    # COUNTER: initialization
    counter = VoteCounter(critical_time_frames, fps)
    print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time,
          ' frames')

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # AlphaPose: initialization
    # args_p = update_config(opt.config_alphapose)
    # cfg_p = update_config(args_p.ALPHAPOSE.cfg)
    #
    # args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow
    #
    # demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device)
    # output_pose = opt.output.split('/')[0] + '/pose'
    # if not os.path.exists(output_pose):
    #     os.mkdir(output_pose)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # TODO => COUNTER: draw queueing ROI
        #  compute urn centoid (1st frame only) and plot a bounding box around it
        # if dataset.frame == 1:
        #     counter.read_urn_coordinates(opt.urn, im0s, opt.radius)
        # counter.plot_urn_bbox(im0s)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                if source.lower().startswith('intel'):
                    p, s, im0, frame = path, '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            print(save_path)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    # tracks inside a critical sphere
                    trackers_inside = []
                    for i, d in enumerate(trackers):
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

                        # TODO: queue COUNTER
                        # d_include = counter.centroid_distance(d, im0, colors[1], dataset.frame)
                        # if d_include:
                        #     trackers_inside.append(d)

                    # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere
                    # if len(trackers_inside) > 0:
                    #     pose = demo.process('frame_'+str(dataset.frame), im0, trackers_inside)
                    #     im0 = demo.vis(im0, pose)
                    #     demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval)
                    #
                    #     counter.save_features_and_crops(im0, dataset.frame, trackers_inside, features, folder_main)

            cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)),
                        (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255),
                        2)

            print('NUM VOTERS', len(counter.voters))
            print(list(counter.voters.keys()))

            # COUNTER
            if len(counter.voters) > 0:
                counter.save_voter_trajectory(dataset.frame, folder_main)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        if type(vid_cap
                                ) is dict:  # estimate distance_in_meters
                            # TODO hard code
                            w, h, fps = 640, 480, 6
                        else:
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #19

Show file

def detect(opt, *args):
    out, source, weights, view_img, save_txt, imgsz, save_img, sort_max_age, sort_min_hits, sort_iou_thresh, skip_interval = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.save_img, opt.sort_max_age, opt.sort_min_hits, opt.sort_iou_thresh, opt.skip_interval
    global skip_count
    print()
    print(skip_interval)
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # Initialize SORT
    sort_tracker = Sort(max_age=sort_max_age,
                        min_hits=sort_min_hits,
                        iou_threshold=sort_iou_thresh)  # {plug into parser}

    # Directory and CUDA settings for yolov5
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load yolov5 model
    model = torch.load(weights, map_location=device)['model'].float(
    )  #load to FP32. yolov5s.pt file is a dictionary, so we retrieve the model by indexing its key
    model.to(device).eval()
    if half:
        model.half()  #to FP16

    # Set DataLoader
    vid_path, vid_writer = None, None

    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        dataset = LoadImages(source, img_size=imgsz)

    # get names of object categories from yolov5.pt model
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  #init img

    # Run once (throwaway)
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    for frame_idx, (path, img, im0s,
                    vid_cap) in enumerate(dataset):  #for every frame
        print()
        print(f"skip_count {skip_count}")
        skip_count += 1
        if skip_count != 0:
            if skip_count == int(skip_interval):
                skip_count = 0
            else:
                continue

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  #unint8 to fp16 or fp32
        img /= 255.0  #normalize to between 0 and 1.
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  #for each detection in this frame
            #print(f"i, det : {i}, {det}")

            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += f'{img.shape[2:]}'  #print image size and detection report
            save_path = str(Path(out) / Path(p).name)

            # Rescale boxes from img_size (temporarily downscaled size) to im0 (native) size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            for c in det[:, -1].unique():  #for each unique object category
                n = (det[:, -1] == c).sum()  #number of detections per class
                s += f' - {n} {names[int(c)]}'

            dets_to_sort = np.empty((0, 6))

            # Pass detections to SORT
            # NOTE: We send in detected object class too
            for x1, y1, x2, y2, conf, detclass in det.cpu().detach().numpy():
                dets_to_sort = np.vstack(
                    (dets_to_sort, np.array([x1, y1, x2, y2, conf, detclass])))
            print('\n')
            print('Input into SORT:\n', dets_to_sort, '\n')

            # Run SORT
            tracked_dets = sort_tracker.update(dets_to_sort)

            print('Output from SORT:\n', tracked_dets, '\n')

            # draw boxes for visualization
            if len(tracked_dets) > 0:
                bbox_xyxy = tracked_dets[:, :4]
                identities = tracked_dets[:, 8]
                categories = tracked_dets[:, 4]
                draw_boxes(im0, bbox_xyxy, identities, categories, names)

            # Write detections to file. NOTE: Not MOT-compliant format.
            if save_txt and len(tracked_dets) != 0:
                for j, tracked_dets in enumerate(tracked_dets):
                    bbox_x1 = tracked_dets[0]
                    bbox_y1 = tracked_dets[1]
                    bbox_x2 = tracked_dets[2]
                    bbox_y2 = tracked_dets[3]
                    category = tracked_dets[4]
                    u_overdot = tracked_dets[5]
                    v_overdot = tracked_dets[6]
                    s_overdot = tracked_dets[7]
                    identity = tracked_dets[8]

                    with open(txt_path, 'a') as f:
                        f.write(
                            f'{frame_idx},{bbox_x1},{bbox_y1},{bbox_x2},{bbox_y2},{category},{u_overdot},{v_overdot},{s_overdot},{identity}\n'
                        )

            print(f'{s} Done. ({t2-t1})')
            # Stream image results(opencv)
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  #q to quit
                    raise StopIteration
            # Save video results
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #20

Show file

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    array_detected_object = []

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    peopleIn = 0
    peopleOut = 0
    detectedIds = []

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        array_detected_object.append(identity)
                        array_detected_object = list(
                            dict.fromkeys(array_detected_object))

                        xas = 0
                        yas = 0

                        if identity >= 0:
                            xas = bbox_xyxy[0][0]
                            yas = bbox_xyxy[0][1]

                        if identity not in detectedIds and int(bbox_top) >= 10:
                            detectedIds.append(identity)
                            if int(bbox_top) >= 500 or (int(bbox_left) >= 800
                                                        and
                                                        int(bbox_top) >= 80):
                                peopleOut += 1
                            if int(bbox_top) <= 100:
                                peopleIn += 1

                        # with open(txt_path, 'a') as f:
                        #     f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                        #             bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format
                        # f.write(('%g ' * 3 + '\n') % (identity, bbox_left, bbox_top))  # label format
                        # resultText = str(identity) + '-' + str(bbox_top)
                        # f.write(resultText + '\n')  # label format
                        # f.write(('%g ' * 4 + '\n') % (-1, frame_idx, -1, -1, str(xas), str(yas)))  # label format
                        # f.write(str(identity))
                        # f.write(('%g ' * 1 + '\n') % (identity))
                        # f.write('\n')
                        # f.write(str(bbox_xyxy))
                        # f.write("Number people counted: " + str(len(array_detected_object)))
                        # with open(txt_path, 'r') as fp:
                        #     line = fp.readline()
                        #     cnt = 1
                        #     while line:
                        #         identity = line.split("-")[0]
                        #         infoCheck = line.split("-")[1]
                        #         if identity not in detectedIds:
                        #             detectedIds.append(identity)
                        #             if int(infoCheck) > 680 :
                        #                 peopleOut += 1
                        #             else:
                        #                 peopleIn +=1

                        # print("Line {}: {}".format(cnt, line.strip().split("-")[0]))
                        # line = fp.readline()

                        # cnt += 1
                        # print("All people counted: " + str(peopleIn + peopleOut))
                        # print("Number people in: " + str(peopleIn))
                        # print("Number people out: " + str(peopleOut))

            font = cv2.FONT_HERSHEY_DUPLEX
            cv2.putText(im0,
                        "People in out counted: " + str(peopleIn + peopleOut),
                        (50, 100), font, 0.8, (0, 255, 255), 2, font)
            cv2.putText(im0, "Number people in: " + str(peopleIn), (50, 135),
                        font, 0.8, (0, 255, 255), 2, font)
            cv2.putText(im0, "Number people out: " + str(peopleOut), (50, 170),
                        font, 0.8, (0, 255, 255), 2, font)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    txt_result = str(Path(out)) + '/result-counted.txt'

                    print("All people counted: " + str(peopleIn + peopleOut))
                    print("Number people in: " + str(peopleIn))
                    print("Number people out: " + str(peopleOut))
                    with open(txt_path, 'a') as f:
                        f.write("All people counted: " +
                                str(peopleIn + peopleOut))
                        f.write("Number people in: " + str(peopleIn))
                        f.write("Number people out: " + str(peopleOut))

                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #21

Show file

    def detect(self):
        progress = 0
        out, source, weights, view_img, save_txt, imgsz = \
            self.opt['output'], self.opt['source'], self.opt['weights'], self.opt['view_img'], self.opt['save_txt'], self.opt['img_size']
        webcam = source == '0' or source.startswith(
            'rtsp') or source.startswith('http') or source.endswith('.txt')

        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file(self.opt['config_deepsort'])
        deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                            max_dist=cfg.DEEPSORT.MAX_DIST,
                            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                            max_age=cfg.DEEPSORT.MAX_AGE,
                            n_init=cfg.DEEPSORT.N_INIT,
                            nn_budget=cfg.DEEPSORT.NN_BUDGET,
                            use_cuda=True)

        # Initialize
        device = select_device(self.opt['device'])
        # if os.path.exists(out):
        #     shutil.rmtree(out)  # delete output folder
        # os.makedirs(out)  # make new output folder
        half = device.type != 'cpu'  # half precision only supported on CUDA

        # Load model
        # google_utils.attempt_download(weights)
        model = torch.load(
            weights, map_location=device)['model'].float()  # load to FP32
        # model = torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
        # model.fuse()
        model.to(device).eval()
        if half:
            model.half()  # to FP16

        # Set Dataloader
        vid_path, vid_writer = None, None
        if webcam:
            view_img = True
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz)
        else:
            # view_img = True
            save_img = True
            dataset = LoadImages(source,
                                 self.opt['frame_range'],
                                 img_size=imgsz)

        # Get names and colors
        names = model.module.names if hasattr(model, 'module') else model.names

        # Run inference
        t0 = time.time()
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once

        save_path = str(Path(out)) + '/video.mkv'
        txt_path = str(Path(out)) + '/track_results.txt'
        detection_path = str(Path(out)) + '/detection_results.txt'

        for frame_idx, (path, img, im0s, vid_cap,
                        caption) in enumerate(dataset):
            self.progress_bar = dataset.frame_progress
            # print(self.progress_bar)
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=self.opt['augment'])[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       self.opt['conf_thres'],
                                       self.opt['iou_thres'],
                                       classes=self.opt['classes'],
                                       agnostic=self.opt['agnostic_nms'])
            t2 = time_synchronized()

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                s += '%gx%g ' % img.shape[2:]  # print string
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    bbox_xywh = []
                    confs = []

                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        img_h, img_w, _ = im0.shape
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(
                            img_w, img_h, *xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])
                        with open(detection_path, 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (frame_idx, x_c, y_c, bbox_w, bbox_h))

                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0)

                    # draw boxes for visualization
                    # if len(outputs) > 0:
                    #     bbox_xyxy = outputs[:, :4]
                    #     identities = outputs[:, -1]
                    #     draw_boxes(im0, bbox_xyxy, identities)

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') %
                                        (frame_idx, identity, bbox_left,
                                         bbox_top, bbox_w, bbox_h, -1, -1, -1,
                                         -1))  # label format

                # Print time (inference + NMS)
                print('%s %sDone. (%.3fs)' % (caption, s, t2 - t1))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc(*self.opt['fourcc']), fps,
                            (w, h))
                    vid_writer.write(im0)

        if isinstance(vid_writer, cv2.VideoWriter):
            vid_writer.release()
        print('Done. (%.3fs)' % (time.time() - t0))

Example #22

Show file

def run(
        weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path
        imgsz=(640, 640),  # inference size (height, width)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project=ROOT / 'runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
):
    source = str(source)
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://', 'https://'))
    webcam = source.isnumeric() or source.endswith('.txt') or (is_url
                                                               and not is_file)
    if is_url and is_file:
        source = check_file(source)  # download

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
    stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= (
        pt or jit or onnx or engine
    ) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
    if pt or jit:
        model.model.half() if half else model.model.float()

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz), half=half)  # warmup
    dt, seen = [0.0, 0.0, 0.0], 0
    for path, im, im0s, vid_cap, s in dataset:
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem,
                                   mkdir=True) if visualize else False
        pred = model(im, augment=augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        dt[2] += time_sync() - t3

        # Second-stage classifier (optional)
        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # im.txt
            s += '%gx%g ' % im.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0,
                                  line_width=line_thickness,
                                  example=str(names))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True))
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Stream results
            im0 = annotator.result()
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path = str(Path(save_path).with_suffix(
                            '.mp4'))  # force *.mp4 suffix on results videos
                        vid_writer[i] = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer[i].write(im0)

        # Print time (inference-only)
        LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(
        f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}'
        % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

Example #23

Show file

File: main.py Project: yumeng88/Multi-class_Yolov5_DeepSort_Pytorch

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Read Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']
    print(names)

    # Load model
    #google_utils.attempt_download(weights)
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    #model = torch.save(torch.load(weights, map_location=device), weights)  # update model if SourceChangeWarning
    # model.fuse()
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t_fps = time_synchronized()
    frame_fps = 0
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                #print(det[:, -1].unique())
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                #clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape

                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    #clses.append([cls.item()])
                    #outputs, clses2 = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), (torch.Tensor(clses)), im0)
                    #outputs = deepsort.update((torch.Tensor(bbox_xywh)), (torch.Tensor(confs)), im0)

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_tlwh = []
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    ori_im = draw_boxes(im0, bbox_xyxy, identities)

                    # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))
            print('%sDone.' % s)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

        frame_fps += 1

        if ((time_synchronized() - t_fps) >= 1):
            print('\n')
            print('FPS=%.2f' % (frame_fps))
            t_fps = time_synchronized()
            frame_fps = 0

        #print('FPS=%.2f' % (1/(time_synchronized() - t1)))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

Example #24

Show file

def mainFunc(args):
    # Set the main function flag
    print("Main Function Start...")

    # Check the GPU device
    print("Number of available GPUs: {}".format(torch.cuda.device_count()))

    # Check whether using the distributed runing for the network
    is_distributed = initDistributed(args)
    master = True
    if is_distributed and os.environ["RANK"]:
        master = int(
            os.environ["RANK"]) == 0  # check whether this node is master node

    # Configuration for device setting
    set_logging()
    if is_distributed:
        device = torch.device('cuda:{}'.format(args.local_rank))
    else:
        device = select_device(args.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load the configuration
    config = loadConfig(args.config)

    # CuDNN related setting
    if torch.cuda.is_available():
        cudnn.benchmark = config.DEVICE.CUDNN.BENCHMARK
        cudnn.deterministic = config.DEVICE.CUDNN.DETERMINISTIC
        cudnn.enabled = config.DEVICE.CUDNN.ENABLED

    # Configurations for dirctories
    save_img, save_dir, source, yolov5_weights, view_img, save_txt, imgsz = \
        False, Path(args.save_dir), args.source, args.weights, args.view_img, args.save_txt, args.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    if save_dir == Path('runs/detect'):  # if default
        os.makedirs('runs/detect', exist_ok=True)  # make base
        save_dir = Path(increment_dir(save_dir / 'exp',
                                      args.name))  # increment run
    os.makedirs(save_dir / 'labels' if save_txt else save_dir,
                exist_ok=True)  # make new dir

    # Load yolov5 model for human detection
    model_yolov5 = attempt_load(config.MODEL.PRETRAINED.YOLOV5,
                                map_location=device)
    imgsz = check_img_size(imgsz,
                           s=model_yolov5.stride.max())  # check img_size
    if half:
        model_yolov5.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        model_classifier = load_classifier(name='resnet101', n=2)  # initialize
        model_classifier.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        model_classifier.to(device).eval()

    # Load resnet model for human keypoints estimation
    model_resnet = eval('pose_models.' + config.MODEL.NAME.RESNET +
                        '.get_pose_net')(config, is_train=False)
    if config.EVAL.RESNET.MODEL_FILE:
        print('=> loading model from {}'.format(config.EVAL.RESNET.MODEL_FILE))
        model_resnet.load_state_dict(torch.load(config.EVAL.RESNET.MODEL_FILE),
                                     strict=False)
    else:
        print('expected model defined in config at EVAL.RESNET.MODEL_FILE')
    model_resnet.to(device)
    model_resnet.eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    pose_transform = transforms.Compose(
        [  # input transformation for 2d human pose estimation
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    # Get names and colors
    names = model_yolov5.module.names if hasattr(
        model_yolov5, 'module') else model_yolov5.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Construt filters for filtering 2D/3D human keypoints
    # filters_2d = constructFilters((1,16,2), freq=25, mincutoff=1, beta=0.01)  # for test
    # filters_3d = constructFilters((1,16,3), freq=25, mincutoff=1, beta=0.01)

    # Run the yolov5 and resnet for 2d human pose estimation
    # with torch.no_grad():
    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model_yolov5(img.half() if half else img
                     ) if device.type != 'cpu' else None  # run once

    # Process every video frame
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred_boxes = model_yolov5(img, augment=args.augment)[0]

        # Apply NMS
        pred_boxes = non_max_suppression(pred_boxes,
                                         args.conf_thres,
                                         args.iou_thres,
                                         classes=args.classes,
                                         agnostic=args.agnostic_nms)
        t2 = time_synchronized()

        # Can not find people and move to next frame
        if pred_boxes[0] is None:
            # show the frame with no human detected
            cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
            cv2.imshow("2D Human Pose Estimation", im0s[0].copy())
            # wait manual operations
            # with kb.Listener(on_press=on_press) as listener:
            #     listener.join()
            #     return
            # if kb.is_pressed('t'):
            #     return
            print("No Human Detected and Move on.")
            print("-" * 30)
            continue

        # Print time (inference + NMS)
        detect_time = t2 - t1
        detect_fps = 1.0 / detect_time
        print("Human Detection Time: {}, Human Detection FPS: {}".format(
            detect_time, detect_fps))

        # Apply Classifier
        if classify:  # false
            pred_boxes = apply_classifier(pred_boxes, model_classifier, img,
                                          im0s)

        # Estimate 2d human pose(multiple person)
        centers = []
        scales = []
        for id, boxes in enumerate(pred_boxes):
            if boxes is not None and len(boxes):
                boxes[:, :4] = scale_coords(img.shape[2:], boxes[:, :4],
                                            im0s[id].copy().shape).round()
            # convert tensor to list format
            boxes = np.delete(boxes.cpu().numpy(), [-2, -1], axis=1).tolist()
            for l in range(len(boxes)):
                boxes[l] = [tuple(boxes[l][0:2]), tuple(boxes[l][2:4])]
            # convert box to center and scale
            for box in boxes:
                center, scale = box_to_center_scale(box, imgsz, imgsz)
                centers.append(center)
                scales.append(scale)
        t3 = time_synchronized()
        pred_pose_2d = get_pose_estimation_prediction(config,
                                                      model_resnet,
                                                      im0s[0],
                                                      centers,
                                                      scales,
                                                      transform=pose_transform,
                                                      device=device)
        t4 = time_synchronized()

        # Print time (2d human pose estimation)
        estimate_time = t4 - t3
        estimate_fps = 1.0 / estimate_time
        print("Pose Estimation Time: {}, Pose Estimation FPS: {}".format(
            estimate_time, estimate_fps))

        # Filter the predicted 2d human pose(multiple person)
        t5 = time_synchronized()
        # if False:  # for test
        if config.EVAL.RESNET.USE_FILTERS_2D:
            # construct filters for every keypoints of every person in 2D
            filters_2d = constructFilters(pred_pose_2d.shape,
                                          freq=1,
                                          mincutoff=1,
                                          beta=0.01)
            print("Shape of filters_2d: ({}, {}, {})".format(
                len(filters_2d), len(filters_2d[0]),
                len(filters_2d[0][0])))  # for test
            for per in range(pred_pose_2d.shape[0]):
                for kp in range(pred_pose_2d.shape[1]):
                    for coord in range(pred_pose_2d.shape[2]):
                        pred_pose_2d[per][kp][coord] = filters_2d[per][kp][
                            coord](pred_pose_2d[per][kp][coord])
        t6 = time_synchronized()

        # Print time (filter 2d human pose)
        filter_time_2d = t6 - t5
        filter_fps_2d = 1.0 / filter_time_2d
        print("Filter 2D Pose Time: {}, Filter 2D Pose FPS: {}".format(
            filter_time_2d, filter_fps_2d))

        # Process detections and estimations in 2D
        for i, box in enumerate(pred_boxes):
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if box is not None and len(box):
                # Rescale boxes from img_size to im0 size
                box[:, :4] = scale_coords(img.shape[2:], box[:, :4],
                                          im0.shape).round()

                # Print results
                for c in box[:, -1].unique():
                    n = (box[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(box):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if args.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line) + '\n') % line)

                    # Add bbox to image
                    if save_img or view_img:
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                # Draw joint keypoints, number orders and human skeletons for every detected people in 2D
                for person in pred_pose_2d:
                    # draw the human keypoints
                    for idx, coord in enumerate(person):
                        x_coord, y_coord = int(coord[0]), int(coord[1])
                        cv2.circle(im0, (x_coord, y_coord), 1, (0, 0, 255), 5)
                        cv2.putText(im0, str(idx), (x_coord, y_coord),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                                    (255, 255, 255), 2, cv2.LINE_AA)

                    # draw the human skeletons in PACIFIC mode
                    for skeleton in PACIFIC_SKELETON_INDEXES:
                        cv2.line(im0, (int(person[skeleton[0]][0]),
                                       int(person[skeleton[0]][1])),
                                 (int(person[skeleton[1]][0]),
                                  int(person[skeleton[1]][1])), skeleton[2], 2)

            # Print time (inference + NMS + estimation)
            print('%sDone. (%.3fs)' % (s, t4 - t1))

            # Stream results
            if view_img:
                detect_text = "Detect FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    detect_fps, detect_time * 1000)
                estimate_text = "Estimate FPS:{0:0>5.2f}/{1:0>6.2f}ms".format(
                    estimate_fps, estimate_time * 1000)
                cv2.putText(im0, detect_text, (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.putText(im0, estimate_text, (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2,
                            cv2.LINE_AA)
                cv2.namedWindow("2D Human Pose Estimation", cv2.WINDOW_NORMAL)
                cv2.imshow("2D Human Pose Estimation", im0)
                if cv2.waitKey(1) & 0xFF == ord('q'):  # q to quit
                    return
                    # goto .mainFunc

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

        # Print time (inference + NMS + estimation + 2d filtering)
        all_process_time = t6 - t1
        all_process_fps = 1.0 / all_process_time
        print("All Process Time: {}, All Process FPS: {}".format(
            all_process_time, all_process_fps))
        print("-" * 30)

    # Goto label
    # label .mainFunc

    # Print saving results
    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    # Release video reader and writer, then destory all opencv windows
    dataset.vid_cap.release()
    vid_writer.release()
    cv2.destroyAllWindows()
    print('Present 2D Human Pose Inference Done. Total Time:(%.3f seconds)' %
          (time.time() - t0))

Example #25

Show file

File: track.py Project: bok9504/car_tracking_line_express

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss


                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []
                
                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                '''
                TODO:
                카운터 추가 요망
                '''
                # counting num and class

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:,-1]
                    # print(outputs[:,-1]) #--> 문제 발견
                    # print("track res cls_id:", cls_id)
                    # cls_ids_show = [cls_ids[i] for i in cls_id]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(txt_path_raw, 'a') as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 +'%g' *1 +'%g ' * 3 + '\n') % (frame_idx, identity, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, classname, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)
                    

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #26

Show file

File: PersonCounter.py Project: HarshRangwala/DeepSort-Surveillance

def detect(opt, save_img=False):
    ct = CentroidTracker()
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    
    model.to(device).eval()
    
# =============================================================================
    filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt'
        
    model_mask = torch.load(filepath_mask, map_location = device)['model'].float()
    model_mask.to(device).eval()
    if half:
        model_mask.half()
        
    names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names
# =============================================================================
    
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = False
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    memory = {}
    people_counter = 0
    in_people = 0
    out_people = 0
    people_mask = 0
    people_none = 0
    time_sum = 0
    # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]
# =============================================================================
        pred_mask = model_mask(img)[0]
# =============================================================================
        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        
# =============================================================================
        pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None)
        
        if pred_mask is None:
            continue
        classification = torch.cat(pred_mask)[:, -1]
        if len(classification) == 0:
            print("----",None)
            continue
        index = int(classification[0])
        
        mask_class = names_m[index]
        print("MASK CLASS>>>>>>> \n", mask_class)
# =============================================================================

        # Create the haar cascade
        # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml"
        # faceCascade = cv2.CascadeClassifier(cascPath)
        
        
        t2 = time_synchronized()
        
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)
            img_center_y = int(im0.shape[0]//2)
            # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))]
            # print("LINE>>>>>>>>>", line,"------------")
            # line = [(990, 672), (1072, 24)]
            line = [(1272, 892), (1800, 203)]
            #  [(330, 468), (704, 468)]
            print("LINE>>>>>>>>>", line,"------------")
            cv2.line(im0,line[0],line[1],(0,0,255),5)
            
# =============================================================================
#             gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
#             # Detect faces in the image
#             faces = faceCascade.detectMultiScale(
#             gray,
#             scaleFactor=1.1,
#             minNeighbors=5,
#             minSize=(30, 30)
#             )
#             # Draw a rectangle around the faces
#             for (x, y, w, h) in faces:
#                 cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2)
#                 text_x = x
#                 text_y = y+h
#                 cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#                                                     1, (0, 0, 255), thickness=1, lineType=2)
# =============================================================================
        
            
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                bbox_xyxy = []
                rects = [] # Is it correct?

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    # label = f'{names[int(cls)]}'
                    xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist()
                    plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3)
                    rects.append(xyxy_list)
                    
                    obj = [x_c, y_c, bbox_w, bbox_h,int(cls)]
                    #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10)
                    bbox_xywh.append(obj)
                    # bbox_xyxy.append(rec)
                    confs.append([conf.item()])
                    


                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = ct.update(rects) # xyxy
                # outputs = deepsort.update(xywhs, confss, im0) # deepsort
                index_id = []
                previous = memory.copy()
                memory = {}
                boxes = []
                names_ls = []
                


                # draw boxes for visualization
                if len(outputs) > 0:
                    
                    # print('output len',len(outputs))
                    for id_,centroid in outputs.items():
                        # boxes.append([output[0],output[1],output[2],output[3]])
                        # index_id.append('{}-{}'.format(names_ls[-1],output[-2]))
                        index_id.append(id_)
                        boxes.append(centroid)
                        memory[index_id[-1]] = boxes[-1]

                    
                    i = int(0)
                    print(">>>>>>>",boxes)
                    for box in boxes:
                        # extract the bounding box coordinates
                        # (x, y) = (int(box[0]), int(box[1]))
                        # (w, h) = (int(box[2]), int(box[3]))
                        x = int(box[0])
                        y = int(box[1])
                        # GGG
                        if index_id[i] in previous:
                            previous_box = previous[index_id[i]]
                            (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                            # (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                            p0 = (x,y)
                            p1 = (x2,y2)
                            
                            cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point
                        
                            if intersect(p0, p1, line[0], line[1]):
                                people_counter += 1
                                print('==============================')
                                print(p0,"---------------------------",p0[1])
                                print('==============================')
                                print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1])
                                # if p0[1] <= line[1][1]:
                                #     in_people +=1
                    
                                
                                # else:
                                #     # if mask_class == 'mask':
                                #     #     print("COUNTING MASK..", mask_class)
                                #     #     people_mask += 1
                                #     # if mask_class == 'none':
                                #     #     people_none += 1
                                #     out_people +=1 
                                if p0[1] >= line[1][1]:
                                    in_people += 1
                                    if mask_class == 'mask':
                                        people_mask += 1
                                    else:
                                        people_none += 1
                                else:
                                    out_people += 1
                            

                        i += 1

                    
                        
                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format
                
            else:
                deepsort.increment_ages()
            cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            # Print time (inference + NMS)
            
            
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            time_sum += t2-t1
            
            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(save_path, im0)
                else:
                    
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
    
    print('Done. (%.3fs)' % (time.time() - t0))

Example #27

Show file

File: track.py Project: johnhutx/21LPCVC-UAV_VIdeo_Track-Sample-Solution

def detect(opt, device, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    colorOrder = ['red', 'purple', 'blue', 'green', 'yellow', 'orange']
    frame_num = 0
    framestr = 'Frame {frame}'
    fpses = []
    frame_catch_pairs = []
    ball_person_pairs = {}

    for color in colorDict:
        ball_person_pairs[color] = 0

    # Read Class Name Yaml
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.FullLoader)
    names = data_dict['names']

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                bbox_xywh = []
                confs = []
                clses = []

                # Write results
                for *xyxy, conf, cls in det:

                    img_h, img_w, _ = im0.shape  # get image shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clses.append([cls.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                clses = torch.Tensor(clses)
                # Pass detections to deepsort
                outputs = []
                global groundtruths_path
                if not 'disable' in groundtruths_path:
                    # print('\nenabled', groundtruths_path)
                    groundtruths = solution.load_labels(
                        groundtruths_path, img_w, img_h, frame_num)
                    if (groundtruths.shape[0] == 0):
                        outputs = deepsort.update(xywhs, confss, clses, im0)
                    else:
                        # print(groundtruths)
                        xywhs = groundtruths[:, 2:]
                        tensor = torch.tensor((), dtype=torch.int32)
                        confss = tensor.new_ones((groundtruths.shape[0], 1))
                        clses = groundtruths[:, 0:1]
                        outputs = deepsort.update(xywhs, confss, clses, im0)

                    if frame_num >= 2:
                        for real_ID in groundtruths[:, 1:].tolist():
                            for DS_ID in xyxy2xywh(outputs[:, :5]):
                                if (abs(DS_ID[0] - real_ID[1]) / img_w < 0.005
                                    ) and (abs(DS_ID[1] - real_ID[2]) / img_h <
                                           0.005) and (
                                               abs(DS_ID[2] - real_ID[3]) /
                                               img_w < 0.005) and (
                                                   abs(DS_ID[3] - real_ID[4]) /
                                                   img_w < 0.005):
                                    id_mapping[DS_ID[4]] = int(real_ID[0])
                else:
                    outputs = deepsort.update(xywhs, confss, clses, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    clses = outputs[:, 5]
                    scores = outputs[:, 6]

                    #Temp solution to get correct id's
                    mapped_id_list = []
                    for ids in identities:
                        if (ids in id_mapping):
                            mapped_id_list.append(int(id_mapping[ids]))
                        else:
                            mapped_id_list.append(ids)

                    ball_detect, frame_catch_pairs, ball_person_pairs = solution.detect_catches(
                        im0, bbox_xyxy, clses, mapped_id_list, frame_num,
                        colorDict, frame_catch_pairs, ball_person_pairs,
                        colorOrder, save_img)

                    t3 = time_synchronized()
                    draw_boxes(im0, bbox_xyxy, [names[i] for i in clses],
                               scores, ball_detect, identities)
                else:
                    t3 = time_synchronized()

            #Draw frame number
            tmp = framestr.format(frame=frame_num)
            t_size = cv2.getTextSize(tmp, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            cv2.putText(im0, tmp, (0, (t_size[1] + 10)),
                        cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)

            #Inference Time
            fps = (1 / (t3 - t1))
            fpses.append(fps)
            print('FPS=%.2f' % fps)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            frame_num += 1

    #t4 = time_synchronized()
    avgFps = (sum(fpses) / len(fpses))
    print('Average FPS = %.2f' % avgFps)
    #print('Total Runtime = %.2f' % (t4 - t0))

    outpath = os.path.basename(source)
    outpath = outpath[:-4]
    outpath = out + '/' + outpath + '_out.csv'
    solution.write_catches(outpath, frame_catch_pairs, colorOrder)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

Example #28

Show file

def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))

Example #29

Show file

stride, names, pt, jit, _ = model.stride, model.names, model.pt, model.jit, model.onnx
imgsz = check_img_size(imgsz, s=stride)  # check image size

# Half
half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
if pt:
    model.model.half() if half else model.model.float()

# Set Dataloader
vid_path, vid_writer = None, None
# Check if environment supports image displays

cudnn.benchmark = True  # set True to speed up constant image size inference

dataset = LoadStreams(source,
                      img_size=imgsz,
                      stride=stride,
                      auto=pt and not jit)

# dataset1 = LoadStreams(source1, img_size=imgsz, stride=stride, auto=pt and not jit)

bs = len(dataset)  # batch_size

vid_path, vid_writer = [None] * bs, [None] * bs

# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names

if pt and device.type != 'cpu':
    model(
        torch.zeros(1, 3, *imgsz).to(device).type_as(
            next(model.model.parameters())))  # warmup