Exemple #1
0
 def tracker(self):
     # initialize deepsort
     cfg = get_config()
     cfg.merge_from_file("deep_sort_pytorch/configs/deep_sort.yaml")
     deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                         max_dist=cfg.DEEPSORT.MAX_DIST,
                         min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                         nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                         max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                         max_age=cfg.DEEPSORT.MAX_AGE,
                         n_init=cfg.DEEPSORT.N_INIT,
                         nn_budget=cfg.DEEPSORT.NN_BUDGET,
                         use_cuda=False)
     return deepsort
    def build_tracker(self):
        """
        Build the deep sort tracker from default config
        To change config, tweak from the yaml file here: deep_sort_pytorch/configs/deep_sort.yaml
        """
        cfg = get_config()
        cfg.merge_from_file(self.config_deepsort)
        use_cuda = not self.device == "cpu" and torch.cuda.is_available()

        self.tracker = DeepSort(cfg.DEEPSORT.REID_CKPT,
                                max_dist=cfg.DEEPSORT.MAX_DIST,
                                min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                                nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                                max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                                max_age=cfg.DEEPSORT.MAX_AGE,
                                n_init=cfg.DEEPSORT.N_INIT,
                                nn_budget=cfg.DEEPSORT.NN_BUDGET,
                                use_cuda=use_cuda)
def main(data_root='', seqs=('', ), args=""):
    logger = get_logger()
    logger.setLevel(logging.INFO)
    data_type = 'mot'
    result_root = os.path.join(Path(data_root), "mot_results")
    mkdir_if_missing(result_root)

    cfg = get_config()
    # cfg.merge_from_file(args.config_detection)
    cfg.merge_from_file(args.config_deepsort)

    # run tracking
    accs = []
    for seq in seqs:
        logger.info('start seq: {}'.format(seq))
        result_filename = os.path.join(result_root, 'result.txt')
        # video_path = data_root+"/"+seq+"/video/video.mp4"

        # with VideoTracker(cfg, args, video_path, result_filename) as vdo_trk:
        #     vdo_trk.run()

        # eval
        logger.info('Evaluate seq: {}'.format(seq))
        evaluator = Evaluator(data_root, seq, data_type)
        accs.append(evaluator.eval_file(result_filename))

    # get summary
    metrics = mm.metrics.motchallenge_metrics
    mh = mm.metrics.create()
    summary = Evaluator.get_summary(accs, seqs, metrics)
    strsummary = mm.io.render_summary(summary,
                                      formatters=mh.formatters,
                                      namemap=mm.io.motchallenge_metric_names)
    print(strsummary)
    Evaluator.save_summary(summary,
                           os.path.join(result_root, 'summary_global.xlsx'))
Exemple #4
0
def detect(config):
    # sent_videos = set()
    TIME_TO_SEND_MSG = 10  # Greenvich Time
    months_rus = ('января', 'февраля', 'марта', 'апреля', 'мая', 'июня',
                  'июля', 'августа', 'сентября', 'октября', 'ноября',
                  'декабря')
    fpeses = []
    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    left_array = None
    rect_left = None

    token = "xxx"
    bot = telebot.TeleBot(token)

    def send_message(current_date, counter_in, counter_out):
        channel = '-1001399933919'
        msg_tosend = "{}: зашло {}, вышло {}".format(current_date, counter_in,
                                                     counter_out)
        bot.send_message(chat_id=channel, text=msg_tosend)

    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter(counter_in=0, counter_out=0, track_id=0)
    VideoHandler = Writer()

    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize device, weights etc.
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    # Initialize model
    model = Darknet(config["cfg"], imgsz)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights,
                                         map_location=device)['model'],
                              strict=False)
    else:  # darknet format
        load_darknet_weights(model, weights)
    # Eval mode
    model.to(device).eval()
    # Half precision
    print(half)
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    print(half)
    if half:
        model.half()

    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img.float()
              ) if device.type != 'cpu' else None  # run once

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        if rect_left is None:
            if webcam:  # batch_size >= 1
                im0 = im0s[0].copy()
            else:
                im0 = im0s
            left_array = [0, 0, im0.shape[1] / 2, im0.shape[0]]
            rect_left = Rectangle(left_array[0], left_array[1], left_array[2],
                                  left_array[3])

        flag_anyone_in_door = False
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        # Inference
        t1 = torch_utils.time_synchronized()

        pred = model(img, augment=config["augment"])[0]
        # to float
        if half:
            pred = pred.float()
        # Apply NMS
        classes = None if config["classes"] == "None" else config["classes"]
        pred = non_max_suppression(pred,
                                   config["conf_thres"],
                                   config["iou_thres"],
                                   multi_label=False,
                                   classes=classes,
                                   agnostic=config["agnostic_nms"])
        # Process detections
        lost_ids = counter.return_lost_ids()
        for i, det in enumerate(pred):  # detections for image i
            if webcam:  # batch_size >= 1
                im0 = im0s[i].copy()
            else:
                im0 = im0s

            bbox_xywh = []
            confs = []
            if det is not None and len(det):
                # Rescale boxes from imgsz to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()
                # Print results
                for c in det[:, -1].unique():
                    if names[int(c)] not in config["needed_classes"]:
                        continue
                # Write results
                for *xyxy, conf, cls in det:
                    #  check if bbox`s class is needed
                    if names[int(cls)] not in config["needed_classes"]:
                        continue
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

        detections = torch.Tensor(bbox_xywh)
        confidences = torch.Tensor(confs)

        if len(detections) != 0:
            outputs_tracked = deepsort.update(detections, confidences, im0)
            counter.someone_inframe()
            # draw boxes for visualization
            if len(outputs_tracked) > 0:
                bbox_xyxy = outputs_tracked[:, :4]
                identities = outputs_tracked[:, -1]
                draw_boxes(im0, bbox_xyxy, identities)
                counter.update_identities(identities)

                for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):

                    ratio_initial = find_ratio_ofbboxes(bbox=bbox_tracked,
                                                        rect_compare=rect_left)
                    #  чел первый раз в контуре двери
                    if VideoHandler.counter_frames_indoor == 0:
                        VideoHandler.start_video(id_tracked)
                        flag_anyone_in_door = True
                    elif id_tracked not in VideoHandler.id_inside_door_detected:
                        VideoHandler.continue_opened_video(id=id_tracked,
                                                           seconds=3)
                        flag_anyone_in_door = True

                    if id_tracked not in counter.people_init or counter.people_init[
                            id_tracked] == 0:
                        counter.obj_initialized(id_tracked)
                        if ratio_initial >= 0.8 and bbox_tracked[
                                3] < left_array[3]:
                            counter.people_init[id_tracked] = 2
                        elif ratio_initial < 0.8 and bbox_tracked[
                                3] > left_array[3]:
                            counter.people_init[id_tracked] = 1
                        else:
                            # res is None, means that object is not in door contour
                            counter.people_init[id_tracked] = 1
                        counter.frame_age_counter[id_tracked] = 0

                        counter.people_bbox[id_tracked] = bbox_tracked

                    counter.cur_bbox[id_tracked] = bbox_tracked
        else:
            deepsort.increment_ages()
            if counter.need_to_clear():
                counter.clear_all()

        for val in counter.people_init.keys():
            # check bbox also
            cur_c = find_centroid(counter.cur_bbox[val])
            init_c = find_centroid(counter.people_bbox[val])
            vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

            if val in lost_ids and counter.people_init[val] != -1:
                # if vector_person < 0 then current coord is less than initialized, it means that man is going
                # in the exit direction
                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val],
                                            rect_compare=rect_left)
                if vector_person[0] > 200 and counter.people_init[val] == 2 \
                        and ratio < 0.7:
                    counter.get_out()
                    VideoHandler.stop_recording(
                        action_occured="вышел из кабинета")
                    print('video {}, action: {}, vector {} \n'.format(
                        VideoHandler.video_name, VideoHandler.action_occured,
                        vector_person))

                elif vector_person[0] < -100 and counter.people_init[val] == 1 \
                        and ratio >= 0.7:
                    counter.get_in()
                    VideoHandler.stop_recording(
                        action_occured="вышел из кабинета")
                    print('video {}, action: {}, vector {} \n'.format(
                        VideoHandler.video_name, VideoHandler.action_occured,
                        vector_person))

                counter.people_init[val] = -1
                lost_ids.remove(val)

            counter.clear_lost_ids()

        ins, outs = counter.show_counter()
        cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8)

        cv2.rectangle(im0, (int(left_array[0]), int(left_array[1])),
                      (int(left_array[2]), int(left_array[3])), (23, 158, 21),
                      3)

        cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                    1e-3 * im0.shape[0], (255, 255, 255), 3)

        if VideoHandler.stop_writing(im0):
            # send_new_posts(video_name, action_occured)

            sent_videos.add(VideoHandler.video_name)
            with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr:
                wr.write('video {}, action: {}, vector {} \n'.format(
                    VideoHandler.video_name, VideoHandler.action_occured,
                    vector_person))

            VideoHandler = Writer()
            VideoHandler.set_fps(fps)

        else:
            VideoHandler.continue_writing(im0, flag_anyone_in_door)

        if view_img:
            cv2.imshow('im0', im0)
            if cv2.waitKey(1) == ord('q'):  # q to quit
                raise StopIteration

        delta_time = (torch_utils.time_synchronized() - t1)

        if len(fpeses) < 30:
            fpeses.append(1 / delta_time)
        elif len(fpeses) == 30:
            median_fps = float(np.median(np.array(fpeses)))
            # fps = round(median_fps, 1)
            fps = 20
            print('fps set: ', fps)
            VideoHandler.set_fps(fps)
            counter.set_fps(fps)
            fpeses.append(fps)
            motion_detection = True
        else:
            print('\nflag writing video: ', VideoHandler.flag_writing_video)
            print('flag stop writing: ', VideoHandler.flag_stop_writing)
            print('flag anyone in door: ', flag_anyone_in_door)
            print('counter frames indoor: ',
                  VideoHandler.counter_frames_indoor)
        # fps = 20
        gm_time = gmtime()
        if gm_time.tm_hour == TIME_TO_SEND_MSG and not counter.just_inited:
            day = gm_time.tm_mday
            month = months_rus[gm_time.tm_mon - 1]
            year = gm_time.tm_year
            date = "{} {} {}".format(day, month, year)
            in_counted, out_counted = counter.show_counter()
            send_message(current_date=date,
                         counter_in=in_counted,
                         counter_out=out_counted)
            counter = Counter(0, 0, 0)
def detect(opt, save_img=False):
    global bird_image
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize the ROI frame
    cv2.namedWindow("image")
    cv2.setMouseCallback("image", get_mouse_points)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file(opt.config_deepsort)
        deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                            max_dist=cfg.DEEPSORT.MAX_DIST,
                            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                            max_age=cfg.DEEPSORT.MAX_AGE,
                            n_init=cfg.DEEPSORT.N_INIT,
                            nn_budget=cfg.DEEPSORT.NN_BUDGET,
                            use_cuda=True)

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    #initialize moving average window
    movingAverageUpdater = movingAverage.movingAverage(5)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    d = DynamicUpdate()
    d.on_launch()

    risk_factors = []
    frame_nums = []
    count = 0

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        if (frame_idx == 0):
            while True:
                image = im0s
                cv2.imshow("image", image)
                cv2.waitKey(1)
                if len(mouse_pts) == 7:
                    cv2.destroyWindow("image")
                    break
            four_points = mouse_pts
            # Get perspective, M is the transformation matrix for bird's eye view
            M, Minv = get_camera_perspective(image, four_points[0:4])

            # Last two points in getMousePoints... this will be the threshold distance between points
            threshold_pts = src = np.float32(np.array([four_points[4:]]))

            # Convert distance to bird's eye view
            warped_threshold_pts = cv2.perspectiveTransform(threshold_pts,
                                                            M)[0]

            # Get distance in pixels
            threshold_pixel_dist = np.sqrt(
                (warped_threshold_pts[0][0] - warped_threshold_pts[1][0])**2 +
                (warped_threshold_pts[0][1] - warped_threshold_pts[1][1])**2)

            # Draw the ROI on the output images
            ROI_pts = np.array([
                four_points[0], four_points[1], four_points[3], four_points[2]
            ], np.int32)

            # initialize birdeye view video writer
            frame_h, frame_w, _ = image.shape

            bevw = birdeye_video_writer.birdeye_video_writer(
                frame_h, frame_w, M, threshold_pixel_dist)
        else:
            break
    t = time.time()
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        print("Loop time: ", time.time() - t)
        t = time.time()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        cv2.polylines(im0s, [ROI_pts], True, (0, 255, 255), thickness=4)

        # Inferenc
        tOther = time.time()
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        print("Non max suppression and inference: ", time.time() - tOther)
        print("Pre detection time: ", time.time() - t)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                bbox_xywh = []
                bbox_xyxy = []
                confs = []

                ROI_polygon = Polygon(ROI_pts)

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    img_h, img_w, _ = im0.shape
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(img_w, img_h, *xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    confs.append([conf.item()])
                    bbox_xyxy.append(xyxy)
                    bbox_xywh.append(obj)

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                deepsortTime = time.time()
                #outputs = deepsort.update(xywhs, confss, im0)
                print("Deepsort function call: ", (time.time() - deepsortTime))
                outputs = bbox_xyxy
                # draw boxes for visualization
                if len(outputs) > 0:
                    # filter deepsort output
                    outputs_in_ROI, ids_in_ROI = remove_points_outside_ROI(
                        bbox_xyxy, ROI_polygon)
                    center_coords_in_ROI = xywh_to_center_coords(
                        outputs_in_ROI)

                    warped_pts = birdeye_transformer.transform_center_coords_to_birdeye(
                        center_coords_in_ROI, M)

                    clusters = DBSCAN(eps=threshold_pixel_dist,
                                      min_samples=1).fit(warped_pts)
                    print(clusters.labels_)
                    draw_boxes(im0, outputs_in_ROI, clusters.labels_)

                    risk_dict = Counter(clusters.labels_)
                    bird_image = bevw.create_birdeye_frame(
                        warped_pts, clusters.labels_, risk_dict)

                    # movingAverageUpdater.updatePoints(warped_pts, ids_in_ROI)
                    #
                    # gettingAvgTime = time.time()
                    # movingAveragePairs = movingAverageUpdater.getCurrentAverage()
                    #
                    # movingAverageIds = [id for id, x_coord, y_coord in movingAveragePairs]
                    # movingAveragePts = [(x_coord, y_coord) for id, x_coord, y_coord in movingAveragePairs]
                    # embded the bird image to the video

                    # otherStuff = time.time()
                    # if(len(movingAveragePairs) > 0):
                    #     movingAvgClusters = DBSCAN(eps=threshold_pixel_dist, min_samples=1).fit(movingAveragePts)
                    #     movingAvgClustersLables = movingAvgClusters.labels_
                    #     risk_dict = Counter(movingAvgClustersLables)
                    #     bird_image = bevw.create_birdeye_frame(movingAveragePts, movingAvgClustersLables, risk_dict)
                    #     bird_image = resize(bird_image, 20)
                    #     bv_height, bv_width, _ = bird_image.shape
                    #     frame_x_center, frame_y_center = frame_w //2, frame_h//2
                    #     x_offset = 20
                    #
                    #     im0[ frame_y_center-bv_height//2:frame_y_center+bv_height//2, \
                    #         x_offset:bv_width+x_offset ] = bird_image
                    # else:
                    #     risk_dict = Counter(clusters.labels_)
                    #     bird_image = bevw.create_birdeye_frame(warped_pts, clusters.labels_, risk_dict)
                    bird_image = resize(bird_image, 20)
                    bv_height, bv_width, _ = bird_image.shape
                    frame_x_center, frame_y_center = frame_w // 2, frame_h // 2
                    x_offset = 20

                    im0[frame_y_center - bv_height // 2:frame_y_center + bv_height // 2, \
                    x_offset:bv_width + x_offset] = bird_image

                    # print("Other stuff: ", time.time() - otherStuff)

                    #write the risk graph

                    risk_factors += [compute_frame_rf(risk_dict)]
                    frame_nums += [frame_idx]
                    graphTime = time.time()

                    if (frame_idx > 100):
                        count += 1
                        frame_nums.pop(0)
                        risk_factors.pop(0)
                    if frame_idx % 10 == 0:
                        d.on_running(frame_nums, risk_factors, count,
                                     count + 100)
                    print("Graph Time: ", time.time() - graphTime)

                # Write MOT compliant results to file
                if save_txt and len(outputs_in_ROI) != 0:
                    for j, output in enumerate(outputs_in_ROI):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            # Stream results
            if view_img:
                # cv2.imshow("bird_image", bird_image)
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:

                if dataset.mode == 'images':
                    cv2.imwrite(save_path, bird_image)
                    cv2.imwrite(save_path, im0)
                else:

                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = False
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'
    url = 'sample_url'
    uid = 'bus1'
    os.system('shutdown -r 06:00')
    memory = {}
    people_counter = 0
    car_counter = 0
    in_people = 0
    out_people = 0
    time_sum = 0
    now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
    
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)
            img_center_x = int(im0.shape[1]//2)
            # line = [(0,img_center_y),(im0.shape[1],img_center_y)]
            line = [(int(img_center_x + 50),0),(img_center_x+50,int(im0.shape[0]))]
            line2 = [(int(img_center_x + 170),0),(img_center_x+170,int(im0.shape[0]))]
            cv2.line(im0,line[0],line[1],(0,0,255),5)
            cv2.line(im0,line2[0],line2[1],(0,255,0),5)
          
            
            if det is not None and len(det):
                
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()
                crop_xyxy = det[:,:4]
                det = det[crop_xyxy[:,0]<img_center_x + 170] # line 오른쪽 지우기
                if len(det) == 0:
                    pass
                else:

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string
                    
                    bbox_xywh = []
                    confs = []
                    bbox_xyxy = []


                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)

                        
                        obj = [x_c, y_c, bbox_w, bbox_h,int(cls)]
                    
                        #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10)
                        bbox_xywh.append(obj)
                        # bbox_xyxy.append(rec)
                        confs.append([conf.item()])
                        


                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0) # deepsort
                    index_id = []
                    previous = memory.copy()
                    memory = {}
                    boxes = []
                    names_ls = []



                    # draw boxes for visualization
                    if len(outputs) > 0:
                        
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -2]
                        labels = outputs[:,-1]
                        dic = {0:'person',2:'car'}
                        for i in labels:
                            names_ls.append(dic[i])
                        
                        # print('output len',len(outputs))
                        for output in outputs:
                            boxes.append([output[0],output[1],output[2],output[3]])
                            index_id.append('{}-{}'.format(names_ls[-1],output[-2]))

                            memory[index_id[-1]] = boxes[-1]

                        if time_sum>=60:
                            param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
                            response = requests.post(url,data=param)
                            response_text = response.text
                            with open('counting.txt','a') as f:
                                f.write('{}~{} IN : {}, Out : {} Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))

                            people_counter,car_counter,in_people,out_people = 0,0,0,0
                            time_sum = 0
                            now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
                        i = int(0)
                        for box in boxes:
                            # extract the bounding box coordinates
                            (x, y) = (int(box[0]), int(box[1]))
                            (w, h) = (int(box[2]), int(box[3]))


                            if index_id[i] in previous:
                                previous_box = previous[index_id[i]]
                                (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                                (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                                p0 = (int(x + (w-x)/2), int(y + (h-y)/2))
                                p1 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2))
                                
                                cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point
                            
                                
                                if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'person':
                                    people_counter += 1
                                    if p0[0] > line[1][0]:
                                        in_people +=1
                                    else:
                                        out_people +=1
                                if intersect(p0, p1, line[0], line[1]) and index_id[i].split('-')[0] == 'car':
                                    car_counter +=1
                                
                                
    
                            i += 1

                        draw_boxes(im0,bbox_xyxy,identities,labels)
                            
                        

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                                            bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format       

            else:
                deepsort.increment_ages()
            cv2.putText(im0, 'In : {}, Out : {}'.format(in_people,out_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            cv2.putText(im0, 'Person : {}'.format(people_counter), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            # Print time (inference + NMS)
            if time_sum>=60:
                param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
                response = requests.post(url,data=param)
                response_text = response.text
                with open('counting.txt','a') as f:
                    f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))

                people_counter,car_counter,in_people,out_people = 0,0,0,0
                time_sum = 0
                now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
            
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            time_sum += t2-t1
            


            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(save_path, im0)
                else:
                    
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
    param={'In_people':in_people,'Out_people':out_people,'uid':uid,'time':now_time+'~'+datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')}
    response = requests.post(url,data=param)
    response_text = response.text
    with open('counting.txt','a') as f:
        f.write('{}~{} IN : {}, Out : {}, Response: {}\n'.format(now_time,datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S'),in_people,out_people,response_text))
    print('Done. (%.3fs)' % (time.time() - t0))
def infer(model,
          data_path,
          detections_file,
          resize,
          max_size,
          batch_size,
          config_deepsort,
          mixed_precision=False,
          is_master=True,
          world=0,
          original_annotations=None,
          use_dali=True,
          is_validation=False,
          verbose=False,
          save_images=False,
          output_path='./'):
    'Run inference on images from path'
    # import pdb;pdb.set_trace()
    if os.path.isdir(output_path):
        shutil.rmtree(output_path)
    os.mkdir(output_path)
    print('model', model)
    backend = 'pytorch' if isinstance(model, Model) or isinstance(
        model, DDP) else 'tensorrt'

    #print("backend",backend)
    stride = model.module.stride if isinstance(model, DDP) else model.stride

    # TensorRT only supports fixed input sizes, so override input size accordingly
    if backend == 'tensorrt': max_size = max(model.input_size)

    cfg = get_config()
    cfg.merge_from_file(config_deepsort)

    conf_threshold = cfg.DEEPSORT.MIN_CONFIDENCE
    # Prepare model
    if backend is 'pytorch':
        # If we are doing validation during training,
        # no need to register model with AMP again
        if not is_validation:
            if torch.cuda.is_available(): model = model.cuda()
            model = amp.initialize(model,
                                   None,
                                   opt_level='O2' if mixed_precision else 'O0',
                                   keep_batchnorm_fp32=True,
                                   verbosity=0)

        model.eval()

    if verbose:
        print('   backend: {}'.format(backend))
        print('    device: {} {}'.format(
            world, 'cpu' if not torch.cuda.is_available() else
            'gpu' if world == 1 else 'gpus'))
        print('     batch: {}, precision: {}'.format(
            batch_size, 'unknown' if backend is 'tensorrt' else
            'mixed' if mixed_precision else 'full'))

    print('Running inference on {}'.format(os.path.basename(data_path)))

    results = []
    profiler = Profiler(['infer', 'fw'])

    def processResult(results, data_iterator):
        p_detections = []
        C = data_iterator.coco
        for d in results:

            id, outputs, ratios = d

            img = C.loadImgs([id])
            filename = img[0]['file_name']
            result = ['', [], []]
            result[0] = os.path.join(path, filename)
            if len(outputs) > 0:
                # import pdb;pdb.set_trace()
                outputs[:, :4] = outputs[:, :4] / ratios
                result[1] = outputs
            A = C.loadAnns(C.getAnnIds([id]))
            # import pdb;pdb.set_trace()
            for a in A:
                x1, y1, w, h = a['bbox']
                a['bbox'] = [x1, y1, x1 + w, y1 + h]
            result[2] = A
            p_detections += [result]
        return p_detections

    path = data_path  #+ 'sequences/'
    videoList = os.listdir(path)

    # Prepare dataset
    if verbose: print('Preparing dataset...')

    # Create annotations if none was provided
    if not original_annotations:
        return
    else:
        annotations = original_annotations

    data_iterator = DataIterator(path,
                                 resize,
                                 max_size,
                                 batch_size,
                                 stride,
                                 world,
                                 annotations,
                                 training=False)

    detection_results = []

    id_count = 0
    sort_time = 0
    with torch.no_grad():
        for i, (data, ids, ratios) in enumerate(tqdm(data_iterator)):

            video = os.path.dirname(
                data_iterator.coco.loadImgs(ids.item())[0]['file_name'])
            if not os.path.isfile(os.path.join(output_path, video + '.txt')):
                id_count = i
                open(os.path.join(output_path, video + '.txt'), "w+")
                deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,\
                    max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,\
                    nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,\
                    max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,\
                    use_cuda=True)

                print(id_count)
                if save_images and len(results) > 0:
                    output_anno = processResult(results,
                                                data_iterator=data_iterator)

                    print("saving output images...")
                    save_path = os.path.dirname(
                        data_path) + '/outputs/' + video
                    if os.path.isdir(save_path):
                        shutil.rmtree(save_path)
                    os.mkdir(save_path)
                    show_MOT(save_path, output_anno)
                results = []

            # print("data:",data)
            # import pdb;pdb.set_trace()
            profiler.start('fw')
            t1 = time_synchronized()
            scores, boxes, classes = model(data)
            profiler.stop('fw')
            detection_results.append([scores, boxes, classes, ids, ratios])

            # import pdb;pdb.set_trace()

            t2 = time_synchronized()

            im = data[0].permute(1, 2, 0).cpu().numpy()
            xywhs = torch.stack([
                torch.stack([
                    x1 + (x2 - x1 + 1) / 2, y1 + (y2 - y1 + 1) / 2,
                    x2 - x1 + 1, y2 - y1 + 1
                ]) for x1, y1, x2, y2 in boxes[0].round()
            ]).cpu()

            t3 = time_synchronized()
            outputs = deepsort.update(xywhs, scores[0].cpu(), im,
                                      mapClasses(classes[0].cpu()))

            # outputs = torch.Tensor(outputs).reshape(1,-1,5)
            t4 = time_synchronized()
            sort_time += t4 - t3
            if len(outputs) > 0:

                outputs[:, :4] = outputs[:, :4] / ratios[0].item()
            # print(t2-t1,t3-t2,t4-t3)

            results.append([ids[0].item(), outputs, 1])

            # write result to txt
            if len(outputs) != 0:
                for j, output in enumerate(outputs):
                    bbox_left = output[0]
                    bbox_top = output[1]
                    bbox_w = output[2] - output[0]
                    bbox_h = output[3] - output[1]
                    identity = output[-2]
                    cls = output[-1]
                    # if cls == 2:
                    #     continue
                    # import pdb;pdb.set_trace()

                    with open(os.path.join(output_path, video + '.txt'),
                              'a') as f:
                        f.write(('%g,' * 10 + '\n') %
                                (ids[0].item() - id_count, identity, bbox_left,
                                 bbox_top, bbox_w, bbox_h, 1, cls, -1,
                                 -1))  # label format

            profiler.bump('infer')
            if verbose and (profiler.totals['infer'] > 60
                            or i == len(data_iterator) - 1):
                size = len(data_iterator.ids)
                msg = '[{:{len}}/{}]'.format(min((i + 1) * batch_size, size),
                                             size,
                                             len=len(str(size)))
                msg += ' {:.3f}s/{}-batch'.format(profiler.means['infer'],
                                                  batch_size)
                msg += ' (fw: {:.3f}s)'.format(profiler.means['fw'])
                msg += ', {:.1f} im/s'.format(batch_size /
                                              profiler.means['infer'])
                msg += ', {:.3f} in deepsort'.format(t4 - t3)

                print(msg, flush=True)

                profiler.reset()

        print("Average FPS = {}".format(i / profiler.totals['infer']))
        print("Average tracking time = {}".format(sort_time / i))

    # Gather results from all devices
    if verbose: print('Gathering results...')

    detection_results = [torch.cat(r, dim=0) for r in zip(*detection_results)]
    if world > 1:
        for r, result in enumerate(detection_results):
            all_result = [
                torch.ones_like(result, device=result.device)
                for _ in range(world)
            ]
            torch.distributed.all_gather(list(all_result), result)
            detection_results[r] = torch.cat(all_result, dim=0)

    # import pdb; pdb.set_trace()

    if is_master:

        # Copy buffers back to host
        detection_results = [r.cpu() for r in detection_results]

        # Collect detections
        detections = []
        processed_ids = set()
        count = [0, 0, 0]
        for scores, boxes, classes, image_id, ratios in zip(
                *detection_results):

            image_id = image_id.item()
            if image_id in processed_ids:
                continue
            processed_ids.add(image_id)

            keep = (scores > 0).nonzero()
            scores = scores[keep].view(-1)
            boxes = boxes[keep, :].view(-1, 4) / ratios
            # classes = classes[keep].view(-1).int()
            # import pdb; pdb.set_trace()
            classes = mapClasses(classes[keep].view(-1).int())

            #print('classes', classes)

            for score, box, cat in zip(scores, boxes, classes):
                x1, y1, x2, y2 = box.data.tolist()
                cat = cat.item()
                if 'annotations' in data_iterator.coco.dataset:

                    cat = data_iterator.coco.getCatIds()[cat]
                    #if cat !=3:
                    #continue
                    #print('cat',cat)
                    count[cat] += 1

                if cat != 0:
                    detections.append({
                        'image_id':
                        image_id,
                        'score':
                        score.item(),
                        'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1],
                        'category_id':
                        cat,
                        'identity':
                        1
                    })

        print(count)
        if detections:
            # import pdb;pdb.set_trace()
            # Save detections
            if detections_file and verbose:
                print('Writing {}...'.format(detections_file))
            detections = {'annotations': detections}
            detections['images'] = data_iterator.coco.dataset['images']

            if 'categories' in data_iterator.coco.dataset:
                detections['categories'] = [
                    data_iterator.coco.dataset['categories']
                ]
            if detections_file:
                json.dump(detections, open(detections_file, 'w'), indent=4)

            # Evaluate model on dataset
            if 'annotations' in data_iterator.coco.dataset:
                if verbose: print('Evaluating model...')
                with redirect_stdout(None):
                    coco_pred = data_iterator.coco.loadRes(
                        detections['annotations'])
                    coco_eval = COCOeval(data_iterator.coco, coco_pred, 'bbox')
                    coco_eval.evaluate()
                    coco_eval.accumulate()
                coco_eval.summarize()
        else:
            print('No detections!')
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss


                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []
                
                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                '''
                TODO:
                카운터 추가 요망
                '''
                # counting num and class

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:,-1]
                    # print(outputs[:,-1]) #--> 문제 발견
                    # print("track res cls_id:", cls_id)
                    # cls_ids_show = [cls_ids[i] for i in cls_id]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(txt_path_raw, 'a') as f: # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 +'%g' *1 +'%g ' * 3 + '\n') % (frame_idx, identity, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, classname, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)
                    

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    def frames():
        out, weights1,weights2, imgsz = \
        'result/','weights/yolov5x.pt', 'weights/best.pt', 416
        source = 'uploads/Mask.mp4'

        save_txt = True
        txt_path = 'content/outputs.txt'
        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file('deep_sort_pytorch/configs/deep_sort.yaml')
        deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                            max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                            max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                            use_cuda=True)
        #Initialize
        device = select_device()
        if os.path.exists(out):
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

        # Half precision
        # half = False and device.type != 'cpu'
        half = True and device.type != 'cpu'
        print('half = ' + str(half))

        # Load deepsort model
        model = torch.load(weights1, map_location=device)['model'].float()  # load to FP32
        model.to(device).eval()
        if half:
            model.half()  # to FP16
        
        dataset = LoadImages(source, img_size=imgsz)
        names = model.module.names if hasattr(model, 'module') else model.names
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        t0 = time.time()
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        # run once
        _ = model(img.half() if half else img) if device.type != 'cpu' else None

        #save_path = str(Path(out))
        #txt_path = str(Path(out)) + '/results.txt'

        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=False)[0]

            # Apply NMS
            pred = non_max_suppression(
                pred, 0.6, 0.3, classes=0, agnostic=False)
            t2 = time_synchronized()

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                p, s, im0 = path, '', im0s
                s += '%gx%g ' % img.shape[2:]  # print string

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(
                        img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    bbox_xywh = []
                    confs = []

                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])

                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0)
                    #print(outputs)
                    # draw boxes for visualization
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        draw_boxes(im0, bbox_xyxy, identities)
                    #print(det)
                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        print('inside savetxt')
                        print(f'{s}Done. ({t2 - t1:.3f}s)')
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                                            bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format

                else:
                    deepsort.increment_ages()

        # Load yolo model
        model = attempt_load(weights2, map_location=device)
        stride = int(model.stride.max())  # model stride
        imgsz = check_img_size(imgsz, s=stride)  # check img_size

        if half:
            model.half()
        
        #model.to(device).float().eval()

        # Second-stage classifier
        classify = False
        if classify:
            modelc = load_classifier(name='resnet101', n=2)  # initialize
            modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
            #modelc.to(device).float().eval()

        # Set Dataloader
        vid_path, vid_writer = None, None
        dataset = LoadImages(source, img_size=imgsz)
        #dataset = LoadStreams(source, img_size=imgsz)
        names = model.module.names if hasattr(model, 'module') else model.names
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        if device.type != 'cpu':
            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
        t0 = time.time()
        history = defaultdict(list)
        DSOutput = pd.read_csv('content/outputs.txt', sep = ' ', header = None)

        for frameNumber, (path, img, im0s, vid_cap) in enumerate(dataset):
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=False)[0]
            
            # Apply NMS
            pred = non_max_suppression(pred, 0.3, 0.3, classes=None, agnostic=False)
            t2 = time_synchronized()
            
            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            for i, det in enumerate(pred):  # detections per image
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

                save_path = 'result/Mask.mp4'
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
                if len(det):
                    num_cat = 6

                    classes = (det[:,-1].cpu().numpy()).astype(int)
                    one_hot_cats = np.eye(num_cat)[classes].reshape(-1, num_cat)

                    counts_per_cat = one_hot_cats.sum(axis=0)
                    #print("Countspercat ", counts_per_cat)
                    score = round(counts_per_cat[[1,3,5]].sum() / len(det),3)

                    weighted_counts_per_cat = one_hot_cats.T @ np.asarray(det[:,-2].cpu())
                    WeightedCompliance = weighted_counts_per_cat[[1,3,5]].sum() / weighted_counts_per_cat.sum()
                    
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                    
                    person_coords = DSOutput[DSOutput.iloc[:,0]==frameNumber].values.reshape(-1,11)

                    CurrentFrameDetection = -1*np.zeros(len(det))

                    if(len(person_coords != 0)):
                        for itemp,mask_coord in enumerate(det):
                        
                        # overlaps = [Overlap(mask_coord[:4], person_coord, img.shape[2], img.shape[3]) for person_coord in person_coords[:,2:6]]
                            overlaps = [Overlap(mask_coord[:4].cpu(), person_coord, 10000, 10000) for person_coord in person_coords[:,2:6]]

                            best_overlap = np.argmax(overlaps)
                            best_person = person_coords[best_overlap,1]
                            history[best_person].append(mask_coord[-1].cpu().item())
                            CurrentFrameDetection[itemp] = best_person

                    #for c in det[:, -1].unique():  #probably error with torch 1.5
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                        
                    CurrentFrameDetection = list(reversed(CurrentFrameDetection))
                    
                    for mask, (*xyxy, conf, cls) in enumerate(reversed(det)):
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, score, label=label, color=colors[int(cls)], personid=CurrentFrameDetection[mask], line_thickness=3)
                print(f'{s}Done. ({t2 - t1:.3f}s)')


                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer

                    fourcc = 'mp4v'  # output video codec
                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                vid_writer.write(im0)
 
            yield cv2.imencode('.jpg', im0)[1].tobytes()

        compliance = 0
        total=0
        txt_result_path = 'result/result.txt'
        for k,v in history.items():
            # 1,3,5 are full
            # 2,4 are partial
            # 0 no
            good_frames = sum(np.array(v)%2==1)
            bad_frames = sum(np.array(v)%2==0)
            if len(v) > 4:
                total += 1
                if good_frames >= bad_frames:
                    compliance +=1
                    print('Person {} is compliant'.format(k))
                    with open(txt_result_path, 'a') as f:
                            f.write('Person {} is compliant \n'.format(k))
                else:
                    print('Person {} is not compliant'.format(k))
                    with open(txt_result_path, 'a') as f:
                            f.write('Person {} is not compliant \n'.format(k))
        Overall_Compliance = round(compliance/total,3)
        with open(txt_result_path, 'a') as f:
                            f.write('Overall compliance:' + str(Overall_Compliance))
        print('Overall compliance:', Overall_Compliance)
        return(Overall_Compliance)
            
        print(f'Done. ({time.time() - t0:.3f}s)')         
Exemple #10
0
def detect(config):
    save_img = False
    imgsz = (320, 320) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img, save_txt = config["output"], config["source"], config["weights"], \
                                                     config["half"], config["view_img"], config["save_txt"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Initialize model
    model = Darknet(config["cfg"], imgsz)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights,
                                         map_location=device)['model'],
                              strict=False)
    else:  # darknet format
        load_darknet_weights(model, weights)

    # Second-stage classifier (not used yet)
    classify = False
    modelc = 0
    if classify:
        modelc = torch_utils.load_classifier(name='resnet101',
                                             n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Eval mode
    model.to(device).eval()

    # Fuse Conv2d + BatchNorm2d layers
    # model.fuse()

    # Export mode
    if ONNX_EXPORT:
        # model.fuse()
        img = torch.zeros((1, 3) + imgsz)  # (1, 3, 320, 192)
        f = config["weights"].replace(config["weights"].split('.')[-1],
                                      'onnx')  # *.onnx filename
        torch.onnx.export(model,
                          img,
                          f,
                          verbose=False,
                          opset_version=9,
                          input_names=['images'],
                          output_names=['classes', 'boxes'])

        # Validate exported model
        import onnx
        model = onnx.load(f)  # Load the ONNX model
        onnx.checker.check_model(model)  # Check that the IR is well formed
        print(onnx.helper.printable_graph(
            model.graph))  # Print a human readable representation of the graph
        return

    # Half precision
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        view_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img.float()
              ) if device.type != 'cpu' else None  # run once

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = torch_utils.time_synchronized()
        pred = model(img, augment=config["augment"])[0]
        t2 = torch_utils.time_synchronized()

        # to float
        if half:
            pred = pred.float()

        # Apply NMS
        classes = None if config["classes"] == "None" else config["classes"]

        pred = non_max_suppression(pred,
                                   config["conf_thres"],
                                   config["iou_thres"],
                                   multi_label=False,
                                   classes=classes,
                                   agnostic=config["agnostic_nms"])

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections for image i
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out)) + '/results.txt'

            s += '%gx%g ' % img.shape[2:]  # print string

            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from imgsz to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    if names[int(c)] not in config["needed_classes"]:
                        continue
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %s, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Write results
                for *xyxy, conf, cls in det:
                    #  check if bbox`s class is needed
                    if names[int(cls)] not in config["needed_classes"]:
                        continue

                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(save_path[:save_path.rfind('.')] + '.txt',
                                  'a') as file:
                            file.write(('%g ' * 5 + '\n') %
                                       (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

                detections = torch.Tensor(bbox_xywh)
                confidences = torch.Tensor(confs)

                # Pass detections to deepsort
                if len(detections) == 0:
                    continue
                outputs = deepsort.update(detections, confidences, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path,
                            cv2.VideoWriter_fourcc(*config["fourcc"]), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(config):
    COLOR_AROUND_DOOR = (48, 58, 221)
    COLOR_DOOR = (23, 158, 21)
    COLOR_LINE = (214, 4, 54)
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    door_array = [611, 70, 663, 310]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    high_border = 342
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2],
                          door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1],
                                 around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8083
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter()
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    device = torch_utils.select_device(
        device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' %
                         config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' %
                         config["model"])

    # cap = cv2.VideoCapture(config["source"])
    # if not cap.isOpened():
    #     raise SystemExit('ERROR: failed to open the input video file!')
    # frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')
    if webcam:
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)
    img = torch.zeros((3, imgsz, imgsz), device=device)  # init img

    cls_dict = get_cls_dict(config["category_num"])
    #vis = BBoxVisualization(cls_dict)
    vis = None
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"],
                       config["letter_box"])

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (256, 256)
        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            t0 = time.time()

            flag_move = False
            flag_anyone_in_door = False

            ratio_detection = 0
            # Process detections
            lost_ids = counter.return_lost_ids()
            if webcam:  # batch_size >= 1
                p, s, im0 = path[0], '%g: ' % 0, im0s[0].copy(
                )  # TODO mb needed in loop for detection
            else:
                p, s, im0 = path, '', im0s
            preds, confs, clss = perform_detection(
                frame=im0,
                trt_yolo=trt_yolo,
                conf_th=config["conf_thres"],
                vis=vis)
            scaled_pred = []
            scaled_conf = []
            detections = torch.Tensor()
            for i, (det, conf, cls) in enumerate(zip(preds, confs, clss)):
                if det is not None and len(det):
                    if names[int(cls)] not in config["needed_classes"]:
                        continue
                    det = xyxy_to_xywh(det)
                    # det = scale_coords(img_shape, det, im0.shape)
                    scaled_pred.append(det)
                    scaled_conf.append(conf)
                detections = torch.Tensor(scaled_pred)
                confidences = torch.Tensor(scaled_conf)
                # Pass detections to deepsort
            if len(detections) != 0:
                outputs = deepsort.update(detections, confidences, im0)
                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    # print('bbox_xywh ', bbox_xywh, 'id', identities)
                    counter.update_identities(identities)
                    for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):
                        ratio_initial = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_around_door)
                        ratio_door = find_ratio_ofbboxes(
                            bbox=bbox_tracked, rect_compare=rect_door)
                        #  чел первый раз в контуре двери
                        if ratio_initial > 0.2:
                            if VideoHandler.counter_frames_indoor == 0:
                                #     флаг о начале записи
                                VideoHandler.start_video(id_tracked)
                            flag_anyone_in_door = True
                        elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                            VideoHandler.continue_opened_video(id=id_tracked,
                                                               seconds=3)
                            flag_anyone_in_door = True
                        if id_tracked not in counter.people_init or counter.people_init[
                                id_tracked] == 0:
                            counter.obj_initialized(id_tracked)
                            if ratio_door >= 0.2 and low_border < bbox_tracked[
                                    3] < high_border:
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif ratio_door < 0.2:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                            # else:
                            #     # res is None, means that object is not in door contour
                            #     counter.people_init[id_tracked] = 1
                            counter.frame_age_counter[id_tracked] = 0
                            counter.people_bbox[id_tracked] = bbox_tracked
                        counter.cur_bbox[id_tracked] = bbox_tracked
            else:
                deepsort.increment_ages()
                if counter.need_to_clear():
                    counter.clear_all()
            # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(
                    np.array([(door_c[i] - cur_c[i])**2
                              for i in range(len(door_c))]))
                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val],
                                            rect_compare=rect_door)
                if val in lost_ids and counter.people_init[val] != -1:
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction
                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="вышел из кабинета")
                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 5000:
                        print('ratio in: {}\n centroids: {}\n'.format(
                            ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(
                            action_occured="зашел внутрь")
                        vals_to_del.append(val)
                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50), (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])), COLOR_DOOR,
                          3)

            cv2.rectangle(
                im0, (int(around_door_array[0]), int(around_door_array[1])),
                (int(around_door_array[2]), int(around_door_array[3])),
                COLOR_AROUND_DOOR, 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (680, low_border),
                     COLOR_LINE, 4)
            cv2.line(im0, (door_array[0], high_border), (680, high_border),
                     COLOR_LINE, 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(
                    bytes(
                        VideoHandler.video_name + ":" +
                        VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('data_files/logs2.txt', 'a',
                          encoding="utf-8-sig") as wr:
                    wr.write(
                        'video {}, action: {}, centroid: {}, ratio_init: {}, ratio_door: {}, ratio: {} \n'
                        .format(VideoHandler.video_name,
                                VideoHandler.action_occured, centroid_distance,
                                ratio_initial, ratio_door, ratio))

                print('_________________video was sent _________________')

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)
            if view_img is True:
                cv2.imshow('image', im0)
                cv2.waitKey(1)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len(fpeses) < 15:
                fpeses.append(round(1 / delta_time))
                print(delta_time)
            elif len(fpeses) == 15:
                # fps = round(np.median(np.array(fpeses)))
                median_fps = float(np.median(np.array(fpeses)))
                fps = round(median_fps, 2)
                print('max fps: ', fps)
                fps = 20
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                if VideoHandler.flag_writing_video:
                    print('\writing video ')
                if VideoHandler.flag_stop_writing:
                    print('stop writing')
                if flag_anyone_in_door:
                    print('anyone in door')
                if VideoHandler.counter_frames_indoor:
                    print('counter frames indoor: {}'.format(
                        VideoHandler.counter_frames_indoor))
Exemple #12
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    last_time = time.time()

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'
    print('starting predictions...')

    # static vars
    time_total_start = 0

    curve_goal = None

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):

        if curve_goal is not None:  # CURVE ACTION
            # eval curve
            total_change = np.array([0], dtype='float64')
            total_time = 10
            time_elapsed = time_synchronized() - time_total_start

            x_start = curve_goal.evaluate(time_elapsed / total_time)[0]
            for a in range(int(time_elapsed * 32),
                           int((time_elapsed + 0.25) * 32)):
                if time_elapsed + 0.25 > total_time:
                    break
                total_change += curve_goal.evaluate(
                    a / (total_time * 32))[0] - x_start
                # print(total_change)
                # if total_change? > 9999999:
                #     print('broke at', total_change, '[max 999999]')

            total_change /= (time_elapsed + 0.5) / total_time

            print((total_change / NORMALIZATION_CONSTANT)[0])
            # print('time', time_elapsed/total_time, '\n')

            rx = max(min(total_change / NORMALIZATION_CONSTANT, [1]), [-1])[0]

            vals['rx'] = round(rx, 5)
            vals['ly'] = 1
            vals['trot'] = 1
            # print(vals['rx'])

            if time_synchronized() - time_total_start > total_time:
                curve_goal = None
                time_total_start = 0

                vals['trot'] = 0
                vals['ly'] = 0
                # sys.exit()
        else:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       opt.conf_thres,
                                       opt.iou_thres,
                                       classes=opt.classes,
                                       agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                s += '%gx%g ' % img.shape[2:]  # print string
                save_path = str(Path(out) / Path(p).name)

                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    bbox_xywh = []
                    confs = []

                    # Adapt detections to deep sort input format
                    for *xyxy, conf, cls in det:
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])

                    xywhs = torch.Tensor(bbox_xywh)
                    confss = torch.Tensor(confs)

                    # Pass detections to deepsort
                    outputs = deepsort.update(xywhs, confss, im0)

                    # draw boxes for visualization
                    if len(outputs) > 0:
                        bbox_xyxy = outputs[:, :4]
                        identities = outputs[:, -1]
                        draw_boxes(im0, bbox_xyxy, identities)

                    # Write MOT compliant results to file
                    if save_txt and len(outputs) != 0:
                        bezier_points = np.zeros((2 * len(outputs), 2))
                        idx = 0

                        for j, output in enumerate(outputs):
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2]
                            bbox_h = output[3]
                            identity = output[-1]
                            with open(txt_path, 'a') as f:
                                f.write(('%g ' * 10 + '\n') %
                                        (frame_idx, identity, bbox_left,
                                         bbox_top, bbox_w, bbox_h, -1, -1, -1,
                                         -1))  # label format
                            # calculating rotation and movement for dog!
                            # gotta use vals['...']
                            # dim CAM_WIDTH x CAM_HEIGHT
                            # rotation calculation
                            middle_x = (bbox_left + bbox_w) / 2

                            # translation calculation
                            percent_filled_y = (bbox_h - bbox_top) / CAM_HEIGHT
                            percent_filled_y *= 100

                            # GENERATE TOLERANCE #
                            max_width_tolerance = 0.375 * (bbox_w - bbox_left)
                            left_bound = bbox_left - max_width_tolerance
                            right_bound = bbox_w + max_width_tolerance

                            # GENERATE DISTANCE #
                            distance_rel = math.e**(-percent_filled_y / 30)
                            # print('left, middle, right', left_bound, middle_x, right_bound)
                            # print('dist', distance_rel, 'pct', percent_filled_y)

                            # GENERATE POINTS #
                            if percent_filled_y < 1.:
                                bezier_points[idx][0] = -1
                                bezier_points[idx][1] = -1
                                bezier_points[idx + 1][0] = -1
                                bezier_points[idx + 1][1] = -1
                            else:
                                if idx > 1:  # relative to last box
                                    midpoint = bezier_points[idx - 1][
                                        0]  # exit point of last node
                                    bezier_points[idx][
                                        0] = left_bound if middle_x >= midpoint else right_bound
                                    bezier_points[idx][
                                        1] = distance_rel - 0.005
                                    bezier_points[idx + 1][
                                        0] = left_bound if middle_x >= midpoint else right_bound
                                    bezier_points[idx +
                                                  1][1] = distance_rel + 0.005
                                else:  # rel to middle
                                    bezier_points[idx][
                                        0] = left_bound if middle_x >= (
                                            CAM_WIDTH / 2) else right_bound
                                    bezier_points[idx][
                                        1] = distance_rel - 0.005
                                    bezier_points[
                                        idx +
                                        1][0] = left_bound if middle_x >= (
                                            CAM_WIDTH / 2) else right_bound
                                    bezier_points[idx +
                                                  1][1] = distance_rel + 0.005

                            idx += 2

                        if cv2.waitKey(1) == ord('f'):
                            points = list()
                            skipped_boxes = list()
                            skip_idx = -1
                            for a in range(bezier_points.shape[0]):
                                x = bezier_points[a][0]
                                y = bezier_points[a][1]

                                if bezier_points.shape[0] > a + 1 and abs(
                                        y - bezier_points[a + 1][1]) < .001:
                                    skip_idx = a + 1

                                if x < 0 or y < 0:
                                    continue

                                if bezier_points.shape[0] > a + 3 and (
                                        a != skip_idx
                                ) and abs(
                                        x - bezier_points[a + 2][0]
                                ) > CAM_WIDTH * 0.2:  # threshold for ignorance
                                    # print('skipping', a + 2)
                                    skipped_boxes.append(a + 2)
                                    skipped_boxes.append(a + 3)

                            for skipped_idx in skipped_boxes:
                                bezier_points[skipped_idx][0] = -1
                                bezier_points[skipped_idx][1] = -1

                            for a in range(bezier_points.shape[0]):
                                x = bezier_points[a][0]
                                y = bezier_points[a][1]
                                if x < 0 or y < 0:
                                    continue
                                points.append((x, y))

                            far_pt = 0
                            if len(points) > 0:
                                far_pt = points[-1][1] + 1
                            points.append((CAM_WIDTH / 2, 0))
                            points.append((CAM_WIDTH / 2, far_pt))
                            points.sort(key=y_coord_sort)

                            nodes_curve_norm = np.swapaxes(
                                np.array(points), 1, 0)
                            nodes_curve = np.asfortranarray(nodes_curve_norm)
                            # print(frame_idx, nodes_curve)
                            print('calculating curve on frame', frame_idx)

                            curve = bezier.Curve(nodes_curve,
                                                 degree=(nodes_curve.shape[1] -
                                                         1))

                            # DISPLAY DATA #

                            # x = left/right bound [0,720]
                            # y = distance [1, e**-3]
                            for j, output in enumerate(outputs):
                                bbox_left = output[0]
                                bbox_top = output[1]
                                bbox_w = output[2]
                                bbox_h = output[3]

                                percent_filled_y = (bbox_h -
                                                    bbox_top) / CAM_HEIGHT
                                percent_filled_y *= 100
                                distance_rel = math.e**(-percent_filled_y / 30)

                                plt.plot([bbox_left, bbox_w],
                                         [distance_rel, distance_rel])
                            plot_bez(curve, frame_idx)

                            # set curve
                            time_total_start = time_synchronized()
                            curve_goal = curve

                            # sys.exit()

                else:
                    deepsort.increment_ages()

                # Print time (inference + NMS)
                # print('%sDone. (%.3fs)' % (s, t2 - t1))

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    print('saving img!')
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        print('saving video!')
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release(
                                )  # release previous video writer

                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(
                                save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                                fps, (w, h))
                        vid_writer.write(im0)
        if not DEBUG_MODE:
            if time.time() - last_time > cooldown:  # so we dont spam
                send_commands(vals)
                last_time = time.time()
            events = sel.select(timeout=1)
            if events:
                for key, mask in events:
                    service_connection(key, mask)
            if not sel.get_map():
                break

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #13
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = view_img
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    # save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    vid = cv2.VideoCapture(source)

    filename = os.path.basename(source).split('.')[0]
    save_path = f"results/{filename}_action.mp4"
    fps = vid.get(cv2.CAP_PROP_FPS)
    w = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*("mp4v")), fps, (w, h))
    # ffmpeg setup
    pipe = Popen([
        'ffmpeg', '-loglevel', 'quiet', '-y', '-f', 'image2pipe', '-vcodec',
        'mjpeg', '-framerate', f'{fps}', '-i', '-', '-vcodec', 'libx264',
        '-crf', '28', '-preset', 'veryslow', '-framerate', f'{fps}',
        f'{save_path}'
    ],
                 stdin=PIPE)
    length = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    pbar = tqdm(total=length, position=0, leave=True)
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        start = time.time()
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, im0 = path, im0s

            # s += '%gx%g ' % img.shape[2:]  # print string
            # save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                im0 = deepsort.update(xywhs, confss, im0)

                # # draw boxes for visualization
                # if len(outputs) > 0:
                #     bbox_xyxy = outputs[:, :4]
                #     identities = outputs[:, -1]
                #     draw_boxes(im0, bbox_xyxy, identities)

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            runtime_fps = 1 / (time.time() - start)
            # print(f"Runtime FPS: {runtime_fps:.2f}")
            pbar.set_description(f"runtime_fps: {runtime_fps}")
            pbar.update(1)
            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            # vid_writer.write(im0)
            im0 = Image.fromarray(im0[..., ::-1])
            # print(im0)
            im0.save(pipe.stdin, 'JPEG')

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        # vid_writer.release()
        pipe.stdin.close()
        pipe.wait()
        pbar.close()

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #14
0
def detect(opt):
    out, source, yolo_weights, deep_sort_weights, show_vid, save_vid, save_txt, imgsz, evaluate = \
        opt.output, opt.source, opt.yolo_weights, opt.deep_sort_weights, opt.show_vid, opt.save_vid, \
            opt.save_txt, opt.img_size, opt.evaluate
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    attempt_download(deep_sort_weights, repo='mikel-brostrom/Yolov5_DeepSort_Pytorch')
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA
    # Load model
    model = attempt_load(yolo_weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()

    save_path = str(Path(out))
    # extract what is in between the last '/' and last '.'
    txt_file_name = source.split('/')[-1].split('.')[0]
    txt_path = str(Path(out)) + '/' + txt_file_name + '.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_sync()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_sync()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            annotator = Annotator(im0, line_width=2, pil=not ascii)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                
                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)): 
                        
                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        c = int(cls)  # integer class
                        label = f'{id} {names[c]} {conf:.2f}'
                        annotator.box_label(bboxes, label, color=colors(c, True))

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                               f.write(('%g ' * 10 + '\n') % (frame_idx, id, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path += '.mp4'

                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer.write(im0)

    if save_txt or save_vid:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #15
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz, GCP_list = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.GCP_list
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path_raw = str(Path(out)) + '/results_raw.txt'

    # 속도까지 붙여버린 데이터 따로 생성해서 비교해보자 : 수정수정
    txt_path_raw2 = str(Path(out)) + '/results_raw2.txt'

    # point load
    with open('./mapdata/point.yaml') as f:
        data = yaml.load(f.read())
    frm_point = data['frm_point']
    geo_point = data['geo_point']

    Counter_1 = [(488, 589), (486, 859)]
    Counter_2 = [(3463, 795), (3487, 1093)]
    Counter_list = [Counter_1, Counter_2]

    datum_dist = []
    counter_dist = []

    line_fileName = './mapdata/Busan1_IC_Polyline_to_Vertex.txt'
    all_line = mapdata_load(line_fileName, frm_point, geo_point)

    percep_frame = 5
    from _collections import deque
    pts = [deque(maxlen=percep_frame + 1) for _ in range(10000)]
    ptsSpeed = [deque(maxlen=1) for _ in range(10000)]

    frame_len = calc_dist(frm_point[1], frm_point[4])
    geo_len = calc_dist(geo_point[1], geo_point[4])

    # ----------------- fix val start
    fixcnt = 1
    # ----------------- fix val end

    # ----------------- counter val start
    memory_index = {}
    memory_id = {}

    cnt = np.zeros((len(Counter_list), 4))
    # total_counter = 0 # 나중에 총 카운터를 만들어 넣으면 되겠지?

    # count_1_total = 0
    # count_1_veh_c0 = 0
    # count_1_veh_c1 = 0
    # count_1_veh_c2 = 0

    # count_2_total = 0
    # count_2_veh_c0 = 0
    # count_2_veh_c1 = 0
    # count_2_veh_c2 = 0
    # ----------------- counter val end

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
            print(pred)

        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clss = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clss.append(cls.item())

                bbox_xywh = bbox_xywh
                cls_conf = confs
                cls_ids = clss
                # xywhs = torch.Tensor(bbox_xywh)
                # confss = torch.Tensor(confs)
                # cls_ids = clss

                # if len(bbox_xywh) == 0:
                #     continue
                # print("detection cls_ids:", cls_ids)

                #filter cls id for tracking
                # print("cls_ids")
                # print(cls_ids)

                # # select class
                # mask = []
                # lst_move_life = [0,1,2]
                # # lst_for_track = []

                # for id in cls_ids:
                #     if id in lst_move_life:
                #         # lst_for_track.append(id)
                #         mask.append(True)
                #     else:
                #         mask.append()
                # # print("mask cls_ids:", mask)

                # # print(bbox_xywh)
                # bbox_xywh = list(compress(bbox_xywh,mask))
                # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
                # bbox_xywh[:,3:] *= 1.2
                # cls_conf = list(compress(cls_conf,mask))
                # print(cls_conf)

                bbox_xywh = torch.Tensor(bbox_xywh)
                cls_conf = torch.Tensor(cls_conf)

                # Pass detections to deepsort
                outputs = deepsort.update(bbox_xywh, cls_conf, im0, cls_ids)
                """
                # output 형식

                [[박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                [박스 좌측상단 x, 박스 좌측상단 y, 박스 우측하단 x, 박스 우측하단 y, 차량 id, 클래스 넘버],
                ...]
                """

                # ------------------------------------------------------------------------------------------------------ img fix start
                t3 = time_synchronized()
                match_mid_point_list = matcher_BRISK_BF(im0, GCP_list)
                t4 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- line start

                # 기준점 위치 갱신을 위한 삼변측량의 거리 정의 및 고정
                if frame_idx == 0:
                    for pointNum in range(len(frm_point)):
                        for GCP_num in range(len(match_mid_point_list)):
                            datum_dist.append(
                                point_dist(match_mid_point_list[GCP_num],
                                           frm_point[pointNum]))
                    datum_dist = np.reshape(
                        datum_dist,
                        (len(frm_point), len(match_mid_point_list)))
                    for Ct_list in Counter_list:
                        for Ctpoint_num in range(len(Ct_list)):
                            for GCP_num in range(len(match_mid_point_list)):
                                counter_dist.append(
                                    point_dist(match_mid_point_list[GCP_num],
                                               Ct_list[Ctpoint_num]))
                    counter_dist = np.reshape(counter_dist,
                                              (len(Counter_list), len(Ct_list),
                                               len(match_mid_point_list)))
                t5 = time_synchronized()

                pre_P = (0, 0)

                for line_num, eachline in enumerate(all_line):
                    for newpoint in eachline['frmPoint']:
                        if line_num == 0:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 0, 255),
                                             -1)  # 차선_실선
                            if calc_dist(pre_P, newpoint) < 390:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 0, 255), 2, -1)
                        elif line_num == 1:
                            im0 = cv2.circle(im0, newpoint, 5, (0, 255, 0),
                                             -1)  # 도로 경계
                            if calc_dist(pre_P, newpoint) < 420:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (0, 255, 0), 2, -1)
                        elif line_num == 2:
                            im0 = cv2.circle(im0, newpoint, 5, (255, 0, 0),
                                             -1)  # 차선_겹선
                            if calc_dist(pre_P, newpoint) < 350:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (255, 0, 0), 2, -1)
                        else:
                            im0 = cv2.circle(im0, newpoint, 5, (100, 100, 0),
                                             -1)  # 차선_점선
                            if calc_dist(pre_P, newpoint) < 600:
                                im0 = cv2.line(im0, pre_P, newpoint,
                                               (100, 100, 0), 2, -1)
                        pre_P = newpoint

                t6 = time_synchronized()
                for pointNum in range(len(frm_point)):
                    im0 = cv2.circle(im0, frm_point[pointNum], 10, (0, 0, 0),
                                     -1)
                    newPoint = intersectionPoint(match_mid_point_list,
                                                 datum_dist[pointNum])
                    frm_point[pointNum] = newPoint

                t7 = time_synchronized()

                #---------------------------------------------------------------------------------------------------------------------- line end

                # ------------------------------------------------------------------------------------------------------ img fix end

                # ------------------------------------------------------------------------------------------------------ counting num and class start
                Counter_newpoint = []
                for Ct_num in range(len(Counter_list)):
                    Ct_list = Counter_list[Ct_num]
                    for Ctpoint_num in range(len(Ct_list)):
                        Counter_newpoint.append(
                            intersectionPoint(
                                match_mid_point_list,
                                counter_dist[Ct_num][Ctpoint_num]))
                Counter_newpoint = np.reshape(
                    Counter_newpoint, (len(Counter_list), len(Ct_list), 2))

                for CountNum in Counter_newpoint:
                    im0 = cv2.line(im0, tuple(CountNum[0]), tuple(CountNum[1]),
                                   (0, 0, 0), 5, -1)

                boxes = []
                indexIDs = []
                classIDs = []
                previous_index = memory_index.copy()
                previous_id = memory_id.copy()
                memory_index = {}
                memory_id = {}
                COLORS = np.random.randint(0,
                                           255,
                                           size=(200, 3),
                                           dtype="uint8")
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        boxes.append(
                            [output[0], output[1], output[2], output[3]])
                        indexIDs.append(int(output[4]))
                        classIDs.append(int(output[5]))
                        memory_index[indexIDs[-1]] = boxes[
                            -1]  # 인덱스 아이디와 박스를 맞춰줌
                        memory_id[indexIDs[-1]] = classIDs[
                            -1]  # 인덱스 아이디와 클레스 아이디를 맞춰줌

                        if len(pts[output[4]]) == 0:
                            pts[output[4]].append(frame_idx)
                        center = (int(((output[0]) + (output[2])) / 2),
                                  int(((output[1]) + (output[3])) / 2))
                        pts[output[4]].append(center)
                        if len(pts[output[4]]) == percep_frame + 1:
                            frmMove_len = np.sqrt(
                                pow(
                                    pts[output[4]][-1][0] -
                                    pts[output[4]][-percep_frame][0], 2) + pow(
                                        pts[output[4]][-1][1] -
                                        pts[output[4]][-percep_frame][1], 2))
                            geoMove_Len = geo_len * frmMove_len / frame_len
                            speed = geoMove_Len * vid_cap.get(
                                cv2.CAP_PROP_FPS) * 3.6 / (pts[output[4]][0] -
                                                           frame_idx)
                            ptsSpeed[output[4]].append(speed)
                            pts[output[4]].clear()

                if len(boxes) > 0:
                    i = int(0)
                    for box in boxes:
                        # 현 위치와 이전 위치를 비교하여 지나갔는지 체크함
                        (x, y) = (int(box[0]), int(box[1]))  # Output 0 1
                        (w, h) = (int(box[2]), int(box[3]))  # Output 2 3 과 같다.
                        color = compute_color_for_labels(indexIDs[i])

                        if indexIDs[i] in previous_index:
                            previous_box = previous_index[indexIDs[i]]
                            # print()
                            # print('previous_box : ')
                            # print(previous_box)
                            (x2, y2) = (int(previous_box[0]),
                                        int(previous_box[1]))
                            (w2, h2) = (int(previous_box[2]),
                                        int(previous_box[3]))
                            p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2)
                                  )  # 현재 박스
                            p1 = (int(x2 + (w2 - x2) / 2),
                                  int(y2 + (h2 - y2) / 2))  # 이전 박스
                            cv2.line(
                                im0, p0, p1, color, 3
                            )  # 이전 정보와 비교 : 중앙에 점을 찍어 가면서 (이전 데이터와 검지 데이터의 점)

                            # 클레스 구분
                            previous_class_id = previous_id[
                                indexIDs[i]]  # 어차피 인덱스 같기 때문에 그냥 넣어줘도 됨 개꿀ㅋ

                            # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(내가 맨든 결과물)
                            # 프레임 수, 인덱스 아이디, 클레스 이름, x좌표, y좌표, w값, h값, 속도값, null, null
                            # with open(txt_path_raw2, 'a') as f:
                            #     f.write(('%g ' * 10+ '\n') % (frame_idx, indexIDs[i], previous_class_id,
                            #                                 p0[0], p0[1], box[2], box[3], -1, -1))  # label format

                            for cntr in range(len(Counter_newpoint)):
                                if intersect(p0, p1, Counter_newpoint[cntr][0],
                                             Counter_newpoint[cntr]
                                             [1]):  # 실질적으로 체크함
                                    if previous_class_id == 0:
                                        cnt[cntr][1] += 1
                                    elif previous_class_id == 1:
                                        cnt[cntr][2] += 1
                                    elif previous_class_id == 2:
                                        cnt[cntr][3] += 1
                                    cnt[cntr][0] += 1

                        i += 1  # 다음 인덱스와 비교하게 만들기 위하여

                # draw counter
                for cntr in range(len(Counter_newpoint)):
                    cv2.putText(im0, 'count_{}_total : {}'.format(
                        cntr + 1, cnt[cntr][0]), (100 + 400 * cntr, 110),
                                cv2.FONT_HERSHEY_DUPLEX, 1.0, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[0],
                        cnt[cntr][1]), (100 + 400 * cntr, 140),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[1],
                        cnt[cntr][2]), (100 + 400 * cntr, 170),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                    cv2.putText(im0, 'count_{}_{} : {}'.format(
                        cntr + 1, names[2],
                        cnt[cntr][3]), (100 + 400 * cntr, 200),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 0),
                                2)  # 카운팅 되는거 보이게
                t8 = time_synchronized()
                # ---------------------------------------------------------------------------------------------------------------------- counter end

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4:5]
                    cls_id = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, cls_id, identities, names,
                               ptsSpeed)

                t9 = time_synchronized()
                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):  # 한 라인씩 쓰는 구조
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[4]
                        classname = output[5]

                        with open(
                                txt_path_raw, 'a'
                        ) as f:  # Yolov5와 DeepSort를 통하여 만들어진 첫 결과물(원본결과물)
                            f.write(('%g ' * 6 + '%g' * 1 + '%g ' * 3 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, classname, -1, -1,
                                     -1))  # label format

            # else:
            #     deepsort.increment_ages()
            t10 = time_synchronized()
            # Print time (inference + NMS + classify)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            t11 = time_synchronized()
            # Save results (image with detections)
            # dataset.mode = 'images'
            # save_path = './track_result/output/{}.jpg'.format(i)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)
            t12 = time_synchronized()

            print('inference + NMS + classify (%.3fs)' % (t2 - t1))
            print('Yolo + DeepSORT (%.3fs)' % (t3 - t2))
            print('find mid point (%.3fs)' % (t4 - t3))
            print('삼변측량을 위한 기준거리 산정 (%.3fs)' % (t5 - t4))
            print('draw line (%.3fs)' % (t6 - t5)
                  )  # 현재는 정밀도로지도에 있는 모든 점들을 대상 계산중 -> 추후 화면에 표시될 점만 계산하는 작업 필요
            print('GCP 점 계산 (%.3fs)' % (t7 - t6))
            print('Count & speed (%.3fs)' % (t8 - t7))
            print('각차량별 그리기 (%.3fs)' % (t9 - t8))
            print('txt 데이터 저장 (%.3fs)' % (t10 - t9))
            print('스크린에 표시하기 (%.3fs)' % (t11 - t10))
            print('비디오파일로 저장하기 (%.3fs)' % (t12 - t11))
            print('one frame done (%.3fs)' % (t12 - t1))

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemple #16
0
def detect(config):
    sent_videos = set()
    fpeses = []
    fps = 0

    global flag, vid_writer, lost_ids
    door_array = [611, 70, 663, 310]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    high_border = 342
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8084
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter()
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize device, weights etc.
    device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else config["device"])
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    # Initialize model
    model = Darknet(config["cfg"], imgsz)

    # Load weights
    attempt_download(weights)
    if weights.endswith('.pt'):  # pytorch format
        model.load_state_dict(torch.load(weights, map_location=device)['model'], strict=False)
    else:  # darknet format
        load_darknet_weights(model, weights)
    # Eval mode
    model.to(device).eval()
    # Half precision
    print(half)
    half = half and device.type != 'cpu'  # half precision only supported on CUDA
    print(half)
    if half:
        model.half()

    if webcam:
        view_img = True
        torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        view_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img.float()) if device.type != 'cpu' else None  # run once

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            flag_move = False
            flag_anyone_in_door = False

            t0_ds = time.time()
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            # Inference
            t1 = torch_utils.time_synchronized()
            pred = model(img, augment=config["augment"])[0]

            # to float
            if half:
                pred = pred.float()
            # Apply NMS
            classes = None if config["classes"] == "None" else config["classes"]
            pred = non_max_suppression(pred, config["conf_thres"], config["iou_thres"],
                                       multi_label=False, classes=classes, agnostic=config["agnostic_nms"])
            # Process detections
            lost_ids = counter.return_lost_ids()
            for i, det in enumerate(pred):  # detections for image i
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                if len(door_array) != 4 or len(around_door_array) != 4:
                    door_array = select_object(im0)
                    print(door_array)

                save_path = str(Path(out) / Path(p).name)
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
                # lost_ids = counter.return_lost_ids()
                bbox_xywh = []
                confs = []
                if det is not None and len(det):
                    # Rescale boxes from imgsz to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                    # Print results
                    for c in det[:, -1].unique():
                        if names[int(c)] not in config["needed_classes"]:
                            continue
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %s, ' % (n, names[int(c)])  # add to string
                    # Write results
                    for *xyxy, conf, cls in det:
                        #  check if bbox`s class is needed
                        if names[int(cls)] not in config["needed_classes"]:
                            continue
                        x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

            detections = torch.Tensor(bbox_xywh)
            confidences = torch.Tensor(confs)

            # Pass detections to deepsort
            # if len(detections) == 0:
            #     continue
            if len(detections) != 0:
                outputs_tracked = deepsort.update(detections, confidences, im0)
                counter.someone_inframe()
                # draw boxes for visualization
                if len(outputs_tracked) > 0:
                    bbox_xyxy = outputs_tracked[:, :4]
                    identities = outputs_tracked[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)
                    counter.update_identities(identities)

                    for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):

                        ratio_initial = find_ratio_ofbboxes(bbox=bbox_tracked, rect_compare=rect_around_door)
                        ratio_door = find_ratio_ofbboxes(bbox=bbox_tracked, rect_compare=rect_door)
                        #  чел первый раз в контуре двери
                        if ratio_initial > 0.2:
                            if VideoHandler.counter_frames_indoor == 0:
                                #     флаг о начале записи
                                VideoHandler.start_video(id_tracked)
                            flag_anyone_in_door = True

                        elif ratio_initial > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                            VideoHandler.continue_opened_video(id=id_tracked, seconds=3)
                            flag_anyone_in_door = True

                        # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1:
                        #     VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005)

                        if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0:
                            counter.obj_initialized(id_tracked)
                            if ratio_door >= 0.2 and low_border < bbox_tracked[3] < high_border :
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif ratio_door < 0.4:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                            else:
                                # res is None, means that object is not in door contour
                                counter.people_init[id_tracked] = 1
                            counter.frame_age_counter[id_tracked] = 0

                            counter.people_bbox[id_tracked] = bbox_tracked

                        counter.cur_bbox[id_tracked] = bbox_tracked
            else:
                deepsort.increment_ages()
                if counter.need_to_clear():
                    counter.clear_all()

            # Print time (inference + NMS)
            t2 = torch_utils.time_synchronized()

            # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))]))

                # init_c = find_centroid(counter.people_bbox[val])
                # vector_person = (cur_c[0] - init_c[0],
                #                  cur_c[1] - init_c[1])

                ratio = find_ratio_ofbboxes(bbox=counter.cur_bbox[val], rect_compare=rect_door)

                if val in lost_ids and counter.people_init[val] != -1:
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction
                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:  # vector_person[1] > 50 and
                        print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел из кабинета")

                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 1000:  # vector_person[1] < -50 and
                        print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="зашел внутрь")
                        vals_to_del.append(val)

                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:

                    if ratio < 0.2 and centroid_distance > 10000:  # vector_person[1] > 50 and
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50),
                          (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])),
                          (23, 158, 21), 3)

            cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])),
                          (int(around_door_array[2]), int(around_door_array[3])),
                          (48, 58, 221), 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr:
                    wr.write(
                        'video {}, action: {}, centroid {} \n'.format(VideoHandler.video_name, VideoHandler.action_occured,
                                                                centroid_distance))

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)

            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (torch_utils.time_synchronized() - t1)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len(fpeses) < 30:
                fpeses.append(1 / delta_time)
            elif len(fpeses) == 30:
                # fps = round(np.median(np.array(fpeses)))
                median_fps = float(np.median(np.array(fpeses)))
                fps = round(median_fps, 2)
                # fps = 20
                print('fps set: ', fps)
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                print('\nflag writing video: ', VideoHandler.flag_writing_video)
                print('flag stop writing: ', VideoHandler.flag_stop_writing)
                print('flag anyone in door: ', flag_anyone_in_door)
                print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
Exemple #17
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False):
    ct = CentroidTracker()
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time

    # Load model
    model = torch.load(weights, map_location=device)[
        'model'].float()  # load to FP32
    
    model.to(device).eval()
    
# =============================================================================
    filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt'
        
    model_mask = torch.load(filepath_mask, map_location = device)['model'].float()
    model_mask.to(device).eval()
    if half:
        model_mask.half()
        
    names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names
# =============================================================================
    
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = False
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    memory = {}
    people_counter = 0
    in_people = 0
    out_people = 0
    people_mask = 0
    people_none = 0
    time_sum = 0
    # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S')
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]
# =============================================================================
        pred_mask = model_mask(img)[0]
# =============================================================================
        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        
# =============================================================================
        pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None)
        
        if pred_mask is None:
            continue
        classification = torch.cat(pred_mask)[:, -1]
        if len(classification) == 0:
            print("----",None)
            continue
        index = int(classification[0])
        
        mask_class = names_m[index]
        print("MASK CLASS>>>>>>> \n", mask_class)
# =============================================================================

        # Create the haar cascade
        # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml"
        # faceCascade = cv2.CascadeClassifier(cascPath)
        
        
        t2 = time_synchronized()
        
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)
            img_center_y = int(im0.shape[0]//2)
            # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))]
            # print("LINE>>>>>>>>>", line,"------------")
            # line = [(990, 672), (1072, 24)]
            line = [(1272, 892), (1800, 203)]
            #  [(330, 468), (704, 468)]
            print("LINE>>>>>>>>>", line,"------------")
            cv2.line(im0,line[0],line[1],(0,0,255),5)
            
# =============================================================================
#             gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
#             # Detect faces in the image
#             faces = faceCascade.detectMultiScale(
#             gray,
#             scaleFactor=1.1,
#             minNeighbors=5,
#             minSize=(30, 30)
#             )
#             # Draw a rectangle around the faces
#             for (x, y, w, h) in faces:
#                 cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2)
#                 text_x = x
#                 text_y = y+h
#                 cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
#                                                     1, (0, 0, 255), thickness=1, lineType=2)
# =============================================================================
        
            
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                bbox_xyxy = []
                rects = [] # Is it correct?

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    # label = f'{names[int(cls)]}'
                    xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist()
                    plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3)
                    rects.append(xyxy_list)
                    
                    obj = [x_c, y_c, bbox_w, bbox_h,int(cls)]
                    #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10)
                    bbox_xywh.append(obj)
                    # bbox_xyxy.append(rec)
                    confs.append([conf.item()])
                    


                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = ct.update(rects) # xyxy
                # outputs = deepsort.update(xywhs, confss, im0) # deepsort
                index_id = []
                previous = memory.copy()
                memory = {}
                boxes = []
                names_ls = []
                


                # draw boxes for visualization
                if len(outputs) > 0:
                    
                    # print('output len',len(outputs))
                    for id_,centroid in outputs.items():
                        # boxes.append([output[0],output[1],output[2],output[3]])
                        # index_id.append('{}-{}'.format(names_ls[-1],output[-2]))
                        index_id.append(id_)
                        boxes.append(centroid)
                        memory[index_id[-1]] = boxes[-1]

                    
                    i = int(0)
                    print(">>>>>>>",boxes)
                    for box in boxes:
                        # extract the bounding box coordinates
                        # (x, y) = (int(box[0]), int(box[1]))
                        # (w, h) = (int(box[2]), int(box[3]))
                        x = int(box[0])
                        y = int(box[1])
                        # GGG
                        if index_id[i] in previous:
                            previous_box = previous[index_id[i]]
                            (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                            # (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                            p0 = (x,y)
                            p1 = (x2,y2)
                            
                            cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point
                        
                            if intersect(p0, p1, line[0], line[1]):
                                people_counter += 1
                                print('==============================')
                                print(p0,"---------------------------",p0[1])
                                print('==============================')
                                print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1])
                                # if p0[1] <= line[1][1]:
                                #     in_people +=1
                    
                                
                                # else:
                                #     # if mask_class == 'mask':
                                #     #     print("COUNTING MASK..", mask_class)
                                #     #     people_mask += 1
                                #     # if mask_class == 'none':
                                #     #     people_none += 1
                                #     out_people +=1 
                                if p0[1] >= line[1][1]:
                                    in_people += 1
                                    if mask_class == 'mask':
                                        people_mask += 1
                                    else:
                                        people_none += 1
                                else:
                                    out_people += 1
                            

                        i += 1

                    
                        
                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format
                
            else:
                deepsort.increment_ages()
            cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3)
            # Print time (inference + NMS)
            
            
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            time_sum += t2-t1
            
            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(save_path, im0)
                else:
                    
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)
    
    print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    if not os.path.exists(opt.smooth_txt):
        os.makedirs(opt.smooth_txt)

    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # # Second-stage classifier
    # classify = False
    # if classify:
    #     modelc = load_classifier(name='resnet101', n=2)  # initialize
    #     modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
    #     modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[np.random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img

    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    # dataset contains all the frames (or images) of the video
    crds_crop = np.empty(
        (0, 4))  # contains coordinates of a single bbox with the highest conf
    np_nan = np.empty([1, 4])  # for tracking
    np_nan[:] = np.nan  # for tracking
    frame_no = 0
    for path, img, im0s, vid_cap in dataset:  # im0s, img - initial, resized and padded (img)
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # # Apply Classifier
        # if classify:
        #     pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            # gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):  # only when obj is in the frame
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()
                # these will be used for the deepsort input
                xywhs = xyxy2xywh(det[:, :4].cpu())
                confs = det[:, 4].cpu()
                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confs,
                                          im0)  # this is numpy array
                ###########################################################
                # FOR NOW, WE WILL ONLY BE KEEPING THE MOST CONFIDENT VALUE
                ###########################################################
                max_conf_id = confs.argmax()
                # keeping the coordinates row with max conf (det now only keeps one row and four columns)
                det = det[max_conf_id, :].reshape(1, 6)
                to_append = xyxy2xywh(det[:, :4].cpu().numpy().reshape(
                    1, 4).astype(int))
                if len(crds_crop) == 0:
                    crds_crop = np.append(crds_crop, to_append).reshape(-1, 4)
                else:
                    crds_crop = np.append(crds_crop, to_append, axis=0)
                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    # draw_boxes(im0, bbox_xyxy, identities)  # no tracking boxes for now
                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (*xyxy, conf))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s' % (names[int(cls)])
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=2)

            else:
                deepsort.increment_ages()
                if len(crds_crop) == 0:
                    crds_crop = np.append(crds_crop, np_nan).reshape(-1, 4)
                else:
                    crds_crop = np.append(crds_crop, np_nan, axis=0)
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)

                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)
        frame_no += 1
    ##############################################################################
    # this part should be temporary as online filtering will be implemented
    crds_crop = linear_interp(crds_crop)
    max_side_bbox = crds_crop[:, 2:].max(axis=1) * 1.2  # 20% relaxation
    # making sure that the window size does not exceed frame size
    max_side_bbox = np.where(max_side_bbox < min(w, h), max_side_bbox,
                             min(w, h))
    crds_crop = np.c_[crds_crop, max_side_bbox]
    crds_crop = smoothing(crds_crop, fps)
    np.savetxt(os.path.join(opt.smooth_txt,
                            os.path.basename(path)[:-4] + '_savgol_' + '.txt'),
               crds_crop,
               delimiter=' ')
    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform == 'darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(config):
    sent_videos = set()
    video_name = ""
    fpeses = []
    fps = 0

    # door_array = select_object()
    # door_array = [475, 69, 557, 258]
    global flag, vid_writer, lost_ids
    # initial parameters
    # door_array = [528, 21, 581, 315]
    # door_array = [596, 76, 650, 295]  #  18 stream
    door_array = [611, 70, 663, 310]
    # around_door_array = [572, 79, 694, 306]  #
    # around_door_array = [470, 34, 722, 391]
    around_door_array = [507, 24, 724, 374]
    low_border = 225
    #
    door_c = find_centroid(door_array)
    rect_door = Rectangle(door_array[0], door_array[1], door_array[2], door_array[3])
    rect_around_door = Rectangle(around_door_array[0], around_door_array[1], around_door_array[2], around_door_array[3])
    # socket
    HOST = "localhost"
    PORT = 8084
    # camera info
    save_img = True
    imgsz = (416, 416) if ONNX_EXPORT else config[
        "img_size"]  # (320, 192) or (416, 256) or (608, 352) for (height, width)
    out, source, weights, half, view_img = config["output"], config["source"], config["weights"], \
                                           config["half"], config["view_img"]
    webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(config["config_deepsort"])
    # initial objects of classes
    counter = Counter(counter_in=0, counter_out=0, track_id=0)
    VideoHandler = Writer()
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    # Initialize device, weights etc.
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # Initialize colors
    names = load_classes(config["names"])
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    if config["category_num"] <= 0:
        raise SystemExit('ERROR: bad category_num (%d)!' % config["category_num"])
    if not os.path.isfile('yolo/%s.trt' % config["model"]):
        raise SystemExit('ERROR: file (yolo/%s.trt) not found!' % config["model"])

    cap = cv2.VideoCapture(config["source"])
    if not cap.isOpened():
        raise SystemExit('ERROR: failed to open the input video file!')
    frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

    cls_dict = get_cls_dict(config["category_num"])
    vis = BBoxVisualization(cls_dict)
    h, w = get_input_shape(config["model"])
    trt_yolo = TrtYOLO(config["model"], (h, w), config["category_num"], config["letter_box"])


    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.connect((HOST, PORT))
        img_shape = (288, 288)
        # for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        while True:
            ret, im0 = cap.read()
            if not ret:
                break
 
            preds, confs, clss = perform_detection(frame=im0, trt_yolo=trt_yolo, conf_th=config["conf_thres"], vis=vis)

            flag_move = False
            flag_anyone_in_door = False
            t0 = time.time()
            ratio_detection = 0

            # Process detections
            lost_ids = counter.return_lost_ids()
            for i, (det, conf, cls) in enumerate(zip( preds, confs, clss)):  

                if det is not None and len(det):
                    # Rescale boxes from imgsz to im0 size
                    # det = scale_coords(img_shape, det, im0.shape).round()
                    if names[int(cls)] not in config["needed_classes"]:
                    	continue
                    # bbox_xywh = []
                    # confs = []
                    # Write results
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(det, im0, label=label, color=colors[int(cls)])

            detections = torch.Tensor(preds)
            confidences = torch.Tensor(confs)

            # Pass detections to deepsort
            if len(detections) == 0:
                continue
            outputs = deepsort.update(detections, confidences, im0)
            print('detections ', detections)
            print('outputs ', outputs)          

            # draw boxes for visualization
            if len(outputs) > 0:
                bbox_xyxy = outputs[:, :4]
                identities = outputs[:, -1]
                draw_boxes(im0, bbox_xyxy, identities)
                print('bbox_xyxy ', bbox_xyxy)
                counter.update_identities(identities)

                for bbox_tracked, id_tracked in zip(bbox_xyxy, identities):

                    rect_detection = Rectangle(bbox_tracked[0], bbox_tracked[1],
                                               bbox_tracked[2], bbox_tracked[3])
                    inter_detection = rect_detection & rect_around_door
                    if inter_detection:
                        inter_square_detection = rect_square(*inter_detection)
                        cur_square_detection = rect_square(*rect_detection)
                        try:
                            ratio_detection = inter_square_detection / cur_square_detection
                        except ZeroDivisionError:
                            ratio_detection = 0
                        #  чел первый раз в контуре двери
                    if ratio_detection > 0.2:
                        if VideoHandler.counter_frames_indoor == 0:
                            #     флаг о начале записи
                            VideoHandler.start_video(id_tracked)
                        flag_anyone_in_door = True

                    elif ratio_detection > 0.2 and id_tracked not in VideoHandler.id_inside_door_detected:
                        VideoHandler.continue_opened_video(id=id_tracked, seconds=3)
                        flag_anyone_in_door = True

                    # elif ratio_detection > 0.6 and counter.people_init.get(id_tracked) == 1:
                    #     VideoHandler.continue_opened_video(id=id_tracked, seconds=0.005)

                    if id_tracked not in counter.people_init or counter.people_init[id_tracked] == 0:
                        counter.obj_initialized(id_tracked)
                        rect_head = Rectangle(bbox_tracked[0], bbox_tracked[1], bbox_tracked[2],
                                              bbox_tracked[3])
                        intersection = rect_head & rect_door
                        if intersection:
                            intersection_square = rect_square(*intersection)
                            head_square = rect_square(*rect_head)
                            rat = intersection_square / head_square
                            if rat >= 0.4 and bbox_tracked[3] > low_border :
                                #     was initialized in door, probably going out of office
                                counter.people_init[id_tracked] = 2
                            elif rat < 0.4:
                                #     initialized in the corridor, mb going in
                                counter.people_init[id_tracked] = 1
                        else:
                            # res is None, means that object is not in door contour
                            counter.people_init[id_tracked] = 1
                        counter.frame_age_counter[id_tracked] = 0

                        counter.people_bbox[id_tracked] = bbox_tracked

                    counter.cur_bbox[id_tracked] = bbox_tracked
                else:
                    deepsort.increment_ages()
                # Print time (inference + NMS)
                t2 = torch_utils.time_synchronized()

                # Stream results
            vals_to_del = []
            for val in counter.people_init.keys():
                # check bbox also
                inter = 0
                cur_square = 0
                ratio = 0
                cur_c = find_centroid(counter.cur_bbox[val])
                centroid_distance = np.sum(np.array([(door_c[i] - cur_c[i]) ** 2 for i in range(len(door_c))]))

                # init_c = find_centroid(counter.people_bbox[val])
                # vector_person = (cur_c[0] - init_c[0],
                #                  cur_c[1] - init_c[1])

                rect_cur = Rectangle(counter.cur_bbox[val][0], counter.cur_bbox[val][1],
                                     counter.cur_bbox[val][2], counter.cur_bbox[val][3])
                inter = rect_cur & rect_door

                if val in lost_ids and counter.people_init[val] != -1:

                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square

                        except ZeroDivisionError:
                            ratio = 0
                    # if vector_person < 0 then current coord is less than initialized, it means that man is going
                    # in the exit direction

                    if counter.people_init[val] == 2 \
                            and ratio < 0.4 and centroid_distance > 5000:
                        print('ratio out: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_out()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел из кабинета")

                        vals_to_del.append(val)

                    elif counter.people_init[val] == 1 \
                            and ratio >= 0.4 and centroid_distance < 1000:
                        print('ratio in: {}\n centroids: {}\n'.format(ratio, centroid_distance))
                        counter.get_in()
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="зашел внутрь")
                        vals_to_del.append(val)

                    lost_ids.remove(val)

                # TODO maybe delete this condition
                elif counter.frame_age_counter.get(val, 0) >= counter.max_frame_age_counter \
                        and counter.people_init[val] == 2:
                    if inter:
                        inter_square = rect_square(*inter)
                        cur_square = rect_square(*rect_cur)
                        try:
                            ratio = inter_square / cur_square
                        except ZeroDivisionError:
                            ratio = 0

                    if ratio < 0.2 and centroid_distance > 10000:
                        counter.get_out()
                        print('ratio out max frames: ', ratio)
                        counter.people_init[val] = -1
                        VideoHandler.stop_recording(action_occured="вышел")
                        vals_to_del.append(val)
                    counter.age_counter[val] = 0

                counter.clear_lost_ids()

            for valtodel in vals_to_del:
                counter.delete_person_data(track_id=valtodel)

            ins, outs = counter.show_counter()
            cv2.rectangle(im0, (0, 0), (250, 50),
                          (0, 0, 0), -1, 8)

            cv2.rectangle(im0, (int(door_array[0]), int(door_array[1])),
                          (int(door_array[2]), int(door_array[3])),
                          (23, 158, 21), 3)

            cv2.rectangle(im0, (int(around_door_array[0]), int(around_door_array[1])),
                          (int(around_door_array[2]), int(around_door_array[3])),
                          (48, 58, 221), 3)

            cv2.putText(im0, "in: {}, out: {} ".format(ins, outs), (10, 35), 0,
                        1e-3 * im0.shape[0], (255, 255, 255), 3)

            cv2.line(im0, (door_array[0], low_border), (880, low_border), (214, 4, 54), 4)

            if VideoHandler.stop_writing(im0):
                # send_new_posts(video_name, action_occured)
                sock.sendall(bytes(VideoHandler.video_name + ":" + VideoHandler.action_occured, "utf-8"))
                data = sock.recv(100)
                print('Received', repr(data.decode("utf-8")))
                sent_videos.add(VideoHandler.video_name)
                with open('../data_files/logs2.txt', 'a', encoding="utf-8-sig") as wr:
                    wr.write('video {}, man {}, centroid {} '.format(VideoHandler.video_name, VideoHandler.action_occured, centroid_distance))

                VideoHandler = Writer()
                VideoHandler.set_fps(fps)

            else:
                VideoHandler.continue_writing(im0, flag_anyone_in_door)

            if view_img:
                cv2.imshow('image', im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            delta_time = (time.time() - t0)
            # t2_ds = time.time()
            # print('%s Torch:. (%.3fs)' % (s, t2 - t1))
            # print('Full pipe. (%.3fs)' % (t2_ds - t0_ds))
            if len (fpeses) < 30:
                fpeses.append(round(1 / delta_time))
            elif len(fpeses) == 30:
                # fps = round(np.median(np.array(fpeses)))
                fps = np.median(np.array(fpeses))
                # fps = 3
                print('fps set: ', fps)
                VideoHandler.set_fps(fps)
                counter.set_fps(fps)
                fpeses.append(fps)
                motion_detection = True
            else:
                print('\nflag writing video: ', VideoHandler.flag_writing_video)
                print('flag stop writing: ', VideoHandler.flag_stop_writing)
                print('flag anyone in door: ', flag_anyone_in_door)
                print('counter frames indoor: ', VideoHandler.counter_frames_indoor)
Exemple #21
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    ##### Inicializar DEEPSORT
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f'{n} {names[int(c)]}s, '  # add to string

                ###### Agregar deepSort
                bbox_xywh = []
                confs = []

                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])

                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)

                # Pass detections to deepsort
                outputs = deepsort.update(xywhs, confss, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    draw_boxes(im0, bbox_xyxy, identities)

                ######


#                 # Write results
#                 for *xyxy, conf, cls in reversed(det):
#                     if save_txt:  # Write to file
#                         xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
#                         line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
#                         with open(txt_path + '.txt', 'a') as f:
#                             f.write(('%g ' * len(line)).rstrip() % line + '\n')

#                     if save_img or view_img:  # Add bbox to image
#                         label = f'{names[int(cls)]} {conf:.2f}'
#                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemple #22
0
    def __init__(self):
        self.count = 0

        self.root = tk.Tk()

        self.out = "inference/output"
        self.source = "inference/images"
        self.weights = "yolov5/weights/yolov5s.pt"
        self.view_img = False
        self.save_txt = False
        self.imgsz = 1088
        self.iou_thres = 0.5
        self.classes = [0, 1, 2, 3, 5, 7]
        self.conf_thres = 0.4
        self.fourcc = "mp4v"
        self.config_deepsort = "deep_sort_pytorch/configs/deep_sort.yaml"
        self.device = ""
        self.agnostic_nms = False
        self.augment = False
        self.two_w, self.three_w, self.four_w, self.truck, self.bus, self.total = (
            None,
            None,
            None,
            None,
            None,
            None,
        )
        self.count = 0
        self.fps = None
        font = ("Arial", 25)
        self.root.resizable(0, 0)
        self.panel = tk.Frame(self.root)
        self.panel.pack(side="top", padx=10, pady=10)
        self.canvas = tk.Label(self.panel,
                               text="loading...",
                               anchor="center",
                               font=font,
                               fg="blue")
        self.canvas.pack(side="left", padx=10, pady=10)
        self.counting_result = tk.Frame(self.root)
        self.counting_result.pack(side="bottom", padx=10, pady=10)
        self.Quit_btn = tk.Button(
            self.counting_result,
            text="Quit",
            font=("Arial", 12),
            command=self.onClose,
            bg="red",
            fg="white",
            width=6,
        )
        self.Quit_btn.grid(row=2, column=5)

        # set a callback to handle when the window is closed
        self.root.wm_title("Traffic")
        self.root.wm_protocol("WM_DELETE_WINDOW", self.onClose)

        # Open camera source
        # self.vid = oneCameraCapture.cameraCapture()
        self.vs = cv2.VideoCapture("traffic3.mp4")

        self.webcam = (self.source == "0" or self.source.startswith("rtsp")
                       or self.source.startswith("http")
                       or self.source.endswith(".txt"))

        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file(self.config_deepsort)
        self.deepsort = DeepSort(
            cfg.DEEPSORT.REID_CKPT,
            max_dist=cfg.DEEPSORT.MAX_DIST,
            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
            max_age=cfg.DEEPSORT.MAX_AGE,
            n_init=cfg.DEEPSORT.N_INIT,
            nn_budget=cfg.DEEPSORT.NN_BUDGET,
            use_cuda=True,
        )

        # Initialize
        self.device = select_device(self.device)
        if os.path.exists(self.out):
            shutil.rmtree(self.out)  # delete output folder
        os.makedirs(self.out)  # make new output folder
        self.half = self.device.type != "cpu"  # half precision only supported on CUDA

        # Load model
        self.model = torch.hub.load("ultralytics/yolov5",
                                    "yolov5s",
                                    pretrained=True)
        self.source = "traffic3.mp4"
        self.save_path = str(Path(self.out))
        self.txt_path = str(Path(self.out)) + "/results.txt"
        self.points = [[5, 100], [400, 100], [730, 300], [5, 300]]
        self.pts = np.array(self.points, np.int32)
        self.pts_arr = self.pts.reshape((-1, 1, 2))
        self.isClosed = True
        self.delay = 100
        self.update()
Exemple #23
0
    def frames():
        logger = Logger()
        print('初始化过了,。。。。。')
        camera = cv2.VideoCapture(Camera.video_source)
        if not camera.isOpened():
            raise RuntimeError('Could not start camera.')

        out, weights, imgsz = \
        '.inference/output', 'weights/yolov5s.pt', 640
        source = "0"
        # print(source)
        # print(type(source))
        webcam = source.isnumeric()
        # print('看看webcam:{0}'.format(webcam))
        '''
        初始化deepsort
        '''
        # initialize deepsort
        cfg = get_config()
        cfg.merge_from_file('deep_sort_pytorch/configs/deep_sort.yaml')
        deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                            max_dist=cfg.DEEPSORT.MAX_DIST,
                            min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                            nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                            max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                            max_age=cfg.DEEPSORT.MAX_AGE,
                            n_init=cfg.DEEPSORT.N_INIT,
                            nn_budget=cfg.DEEPSORT.NN_BUDGET,
                            use_cuda=True)

        device = torch_utils.select_device()
        # print(weights)
        # print(os.getcwd())
        if os.path.exists(out):
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder
        #shutil.rmtree(out)
        # Load model
        # google_utils.attempt_download(weights)
        # model = torch.load(weights, map_location=device)['model']
        model = attempt_load(weights, map_location=device)  # load FP32 model
        model.to(device).eval()

        # Second-stage classifier
        classify = False
        if classify:
            modelc = load_classifier(name='resnet101', n=2)  # initialize
            modelc.load_state_dict(
                torch.load('weights/resnet101.pt',
                           map_location=device)['model'])  # load weights
            modelc.to(device).eval()

        # Half precision
        half = False and device.type != 'cpu'
        # print('half = ' + str(half))

        if half:
            model.half()

        # Set Dataloader
        vid_path, vid_writer = None, None
        # #if webcam:
        # view_img = True
        # cudnn.benchmark = True  # set True to speed up constant image size inference
        # dataset = LoadStreams(source, img_size=imgsz)
        # else:
        # save_img = True
        # #     # 如果检测视频的时候想显示出来,可以在这里加一行view_img = True
        # view_img = True
        #     dataset = LoadImages(source, img_size=imgsz)
        # vid_path, vid_writer = None, None
        #dataset = LoadImages(source, img_size=imgsz)
        dataset = LoadStreams(source, img_size=imgsz)
        # print('看看dataset:{0}'.format(dataset))
        names = model.names if hasattr(model, 'names') else model.modules.names
        # print('----')
        # print(names)
        colors = [[random.randint(0, 255) for _ in range(3)]
                  for _ in range(len(names))]

        # Run inference
        t0 = time.time()
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once
        for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
            # print('path:{0}'.format(path))
            # print('im0s:{0}'.format(im0s))
            # print('im0s类型:{0}'.format(type(im0s)))
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = torch_utils.time_synchronized()
            pred = model(img, augment=False)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       0.4,
                                       0.5,
                                       fast=True,
                                       classes=None,
                                       agnostic=False)
            t2 = torch_utils.time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            for i, det in enumerate(pred):  # detections per image
                #p, s, im0 = path, '', im0s
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                #save_path = str(Path(out) / Path(p).name)
                s += '%gx%g ' % img.shape[2:]  # print string
                #gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  #  normalization gain whwh
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    #for c in det[:, -1].unique():  #probably error with torch 1.5
                    for c in det[:, -1].detach().unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %s, ' % (n, names[int(c)])  # add to string

                    # --- linjie
                    bbox_xywh = []
                    confs = []
                    clses = []
                    # for *xyxy, conf, cls in det:
                    #     label = '%s %.2f' % (names[int(cls)], conf)
                    #     plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                    # Write results
                    for *xyxy, conf, cls in reversed(det):
                        # -- linjie deepsort
                        x_c, y_c, bbox_w, bbox_h = Camera.bbox_rel(*xyxy)
                        obj = [x_c, y_c, bbox_w, bbox_h]
                        bbox_xywh.append(obj)
                        confs.append([conf.item()])
                        clses.append([cls.item()])

                        label = '%s %.2f' % (names[int(cls)], conf)
                        print('看看这次打的标签:{0}'.format(label))

                        #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                        # 判断标签是否为人 --linjie
                        if label is not None:
                            if (label.split())[0] == 'person':
                                logger.info('当前进程:{0}.遇到了人'.format(
                                    os.getpid()))
                                #print('标签是人')
                                # distancing(people_coords, im0, dist_thres_lim=(200, 250))
                                # people_coords.append(xyxy)
                                #plot_one_box(xyxy, im0, line_thickness=3)
                                plot_dots_on_people(xyxy, im0)
                # ---linjie deepsort
                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                clses = torch.Tensor(clses)
                outputs = deepsort.update(xywhs, confss, clses, im0)
                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_tlwh = []
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    clses = outputs[:, 5]
                    scores = outputs[:, 6]
                    stays = outputs[:, 7]
                    Camera.draw_boxes(im0, bbox_xyxy,
                                      [names[i] for i in clses], scores,
                                      identities)

                print('%sDone. (%.3fs)' % (s, t2 - t1))
                # Stream results
                # if view_img:
                #     cv2.imshow(p, im0)
                #     if cv2.waitKey(1) == ord('q'):  # q to quit
                #         raise StopIteration
            yield cv2.imencode('.jpg', im0)[1].tobytes()