コード例 #1
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxs = yolo.detect_image(image)
        print("box_num", len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        print('indices', type(indices), indices)
        detections = [detections[i] for i in indices]
        print('detections', detections, type(detections))
        # print(detections[0])
        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track, det in zip(tracker.tracks, detections):
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  #return true if track is confirmed and returns the no of frames since last update.
                continue
            trk_bbox = track.to_tlbr()
            det_bbox = det.to_tlbr()
            croppedImage = imcrop(frame, trk_bbox)
            cv2.imwrite("trk" + str(fps) + '.jpg', croppedImage)
            cv2.rectangle(frame, (int(trk_bbox[0]), int(trk_bbox[1])),
                          (int(trk_bbox[2]), int(trk_bbox[3])),
                          (255, 255, 255), 2)
            cv2.rectangle(frame, (int(det_bbox[0]), int(det_bbox[1])),
                          (int(det_bbox[2]), int(det_bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(trk_bbox[0]), int(trk_bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        # for det in detections:
        #     bbox = det.to_tlbr()
        #     croppedImage = imcrop(frame, bbox)
        #     cv2.imwrite("det"+str(fps)+'.jpg', croppedImage)
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        # for track in tracker.tracks:
        #     if not track.is_confirmed() or track.time_since_update > 1:
        #         continue
        #     bbox = track.to_tlbr()
        #     croppedImage = imcrop(frame, bbox)
        #     cv2.imwrite("trk"+str(fps) + '.jpg', croppedImage)
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
        #     cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2)
        #
        # for det in detections:
        #     bbox = det.to_tlbr()
        #     croppedImage = imcrop(frame, bbox)
        #     cv2.imwrite("det"+str(fps)+'.jpg', croppedImage)
        #     cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #2
0
    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      converted_boxes, scores[0], names, features)
    ]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]

    tracker.predict()
    tracker.update(detections)

    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

    current_count = int(0)

    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        bbox = track.to_tlbr()
        class_name = track.get_class()
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]

        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
コード例 #3
0
def main():
    ## khoang cach cosine
    max_cosine_distance = 0.9
    nn_budget = None
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric_left = nn_matching.NearestNeighborDistanceMetric(
        "cosine", max_cosine_distance, nn_budget)
    tracker_left = Tracker(metric_left)
    video_capture = cv2.VideoCapture(sys.argv[1])
    frame_count = -1
    tic = time.time()
    fps = 0.0
    sum = 0
    detections = None
    ## load YOLO model
    global metaMain, netMain, altNames
    darknet_image, metaMain, netMain, altNames = load_model(
        metaMain, netMain, altNames, version)

    if write_flag:
        folder_name = sys.argv[1] + ""
        folder_name = folder_name[:len(folder_name) - 4]
        folder_name = folder_name + "_tracked"
        if not os.path.exists(folder_name):
            os.mkdir(folder_name)
        folder_name = folder_name + "/"
        print(folder_name)
        frame_width = int(video_capture.get(3))
        frame_height = int(video_capture.get(4))
        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        out = cv2.VideoWriter(folder_name + 'output_tracked.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))
        f = open(folder_name + "person_count_tracked.txt", "w+")
        f2 = open(folder_name + "fps_tracked.txt", "w+")
    else:
        f = None
    frame_counter = -1

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            if write_flag:
                fps = frame_counter / sum
                # calculate an exponentially decaying average of fps number
                f2.write(str(fps) + "\n")
            break
        visual_frame = frame.copy()
        t1 = time.time()
        frame_count = frame_count + 1
        frame_counter = frame_counter + 1
        tic = time.time()
        frame = cv2.resize(
            frame,
            (darknet.network_width(netMain), darknet.network_height(netMain)),
            interpolation=cv2.INTER_LINEAR)
        if frame_count % step_frame == 0:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb,
                                       (darknet.network_width(netMain),
                                        darknet.network_height(netMain)),
                                       interpolation=cv2.INTER_LINEAR)
            darknet.copy_image_from_bytes(darknet_image,
                                          frame_resized.tobytes())
            detections = darknet.detect_image(netMain,
                                              metaMain,
                                              darknet_image,
                                              thresh=thresh)
            boxes = []
            for detection_left in detections:
                if "person" in str(detection_left[0]):
                    x, y, w, h = detection_left[2][0],\
                        detection_left[2][1],\
                        detection_left[2][2],\
                        detection_left[2][3]
                    xmin, ymin, xmax, ymax = convertBack(
                        int(x), int(y), int(w), int(h))
                    boxes.append((xmin, ymin, xmax - xmin, ymax - ymin))
            detections = get_detection(frame, boxes, encoder)
            frame_count = 0
        # Call the tracker
        tracker_left.predict()
        tracker_left.update(detections)
        toc = time.time()
        sum = sum + toc - tic
        # if write_flag and ((video3 and frame_counter % 30 == 0) or (video3 is False and frame_counter % 25 == 0)):
        if write_flag:
            visual_one_tracker(tracker_left, visual_frame, (255, 0, 0), f,
                               frame_counter, darknet.network_width(netMain),
                               darknet.network_height(netMain), step_frame)
        else:
            visual_one_tracker(tracker_left, visual_frame, (255, 0, 0), None,
                               frame_counter, darknet.network_width(netMain),
                               darknet.network_height(netMain), step_frame)
        fps_text = 'FPS: {:.2f}'.format(fps)
        cv2.putText(visual_frame, fps_text, (0, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0, 0, 0), 3)
        cv2.imshow("demo", visual_frame)

        if write_flag:
            out.write(visual_frame)
        key = cv2.waitKey(1)
        if key == ord('q'):
            break
        elif key == 32:
            key = cv2.waitKey(0)
            if key == ord('s'):
                cv2.imwrite("frame_read.jpg", visual_frame)

    video_capture.release()
    cv2.destroyAllWindows()
def YOLO(videopath):

    global metaMain, netMain, altNames
    configPath = "./configs/yolov4-helmet-detection.cfg"
    weightPath = "./configs/yolov4-helmet-detection.weights"
    metaPath = "./configs/yolov4-helmet-detection.data"

    if not os.path.exists(configPath):
        raise ValueError("Invalid config path `" +
                         os.path.abspath(configPath)+"`")
    if not os.path.exists(weightPath):
        raise ValueError("Invalid weight path `" +
                         os.path.abspath(weightPath)+"`")
    if not os.path.exists(metaPath):
        raise ValueError("Invalid data file path `" +
                         os.path.abspath(metaPath)+"`")
    if netMain is None:
        netMain = darknet.load_net_custom(configPath.encode(
            "ascii"), weightPath.encode("ascii"), 0, 1)  # batch size = 1
    if metaMain is None:
        metaMain = darknet.load_meta(metaPath.encode("ascii"))
    if altNames is None:
        try:
            with open(metaPath) as metaFH:
                metaContents = metaFH.read()
                import re
                match = re.search("names *= *(.*)$", metaContents,
                                  re.IGNORECASE | re.MULTILINE)
                if match:
                    result = match.group(1)
                else:
                    result = None
                try:
                    if os.path.exists(result):
                        with open(result) as namesFH:
                            namesList = namesFH.read().strip().split("\n")
                            altNames = [x.strip() for x in namesList]
                except TypeError:
                    pass
        except Exception:
            pass

    if not os.path.exists("outputs"):
        os.mkdir("outputs")
    """
    DeepSORT Parameters
    """
    max_cosine_distance = 0.5
    nn_budget = None

    # load DeepSORT model
    sort_model_file = "model_data/mars-small128.pb"
    encoder = gdet.create_box_encoder(sort_model_file, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    # load video file / streams
    cap = cv2.VideoCapture(videopath)
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    original_dimension = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    print(original_fps, original_dimension)
    # create head detection result saving directory
    filename = videopath.split(".")[0].split("/")[1]
    directory = os.path.join(os.getcwd(), "outputs", filename)
    if not os.path.exists(directory):
        os.mkdir(directory)
    
    # create video output directory
    out_directory = os.path.join(os.getcwd(), "outputs", "video")
    if not os.path.exists(out_directory):
        os.mkdir(out_directory)
        
    # create VideoWriter for output video
    out_write = cv2.VideoWriter( os.path.join(out_directory, filename+"_processed.mp4")
                               , cv2.VideoWriter_fourcc(*'MP4V')
                               , original_fps
                               , original_dimension
                               )
    
    print("Starting the YOLO loop...")

    # Create an image we reuse for each detect
    darknet_image = darknet.make_image(darknet.network_width(netMain),
                                    darknet.network_height(netMain),3)
    # network image size (416*416, ...)
    network_image_size = (darknet.network_width(netMain),
                          darknet.network_height(netMain))
    fps = 0.0

    # head detection id array
    head_set = set()

    while True:
        ret, frame_read = cap.read()
        if ret:
            t1 = time.time()
            frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb,
                                       network_image_size,
                                       interpolation=cv2.INTER_LINEAR)

            darknet.copy_image_from_bytes(darknet_image, frame_resized.tobytes())

            # get inference information from Yolov4 Model (class, probability, (x,y,width,height))
            detections = darknet.detect_image(netMain, metaMain, darknet_image, thresh=0.25)
            
            # deep sort inference
            bboxes = np.array([x[2] for x in detections])
            scores = np.array([x[1] for x in detections])
            classes = np.array([x[0].decode() for x in detections])
            features = encoder(frame_resized, bboxes)

            detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, classes, features)]

            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])

            tracker.predict()
            tracker.update(detections)
            
            # map color to draw random color for each sorting
            cmap = plt.get_cmap('tab20b')
            colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

            # Deep SORT results
            for track in tracker.tracks:
                
                class_name = track.get_class()
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                # if class is 'helmet', ignore
                if not FLAGS['HELMET_DRAW_ENABLED'] and class_name == "helmet":
                    continue

                # deep sort results
                bbox = track.to_tlbr()
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                # resize bounding box to fit in original image
                xmin, ymin, xmax, ymax = resizeCoord(frame_read.shape, network_image_size, (bbox[0], bbox[1], bbox[2], bbox[3]))
                xmin = (xmin * 2 - xmax)
                ymin = (ymin * 2 - ymax)
                
                # draw class, id on image with opacity
                mask_frame = frame_rgb.copy()
                ALPHA = 0.4
                cv2.rectangle(mask_frame, (xmin, ymin-10), (xmin+(len(class_name)+len(str(track.track_id)))*9, ymin), color, -1)
                text_color = (255,255,255) if class_name == "helmet" else (0,0,0)
                cv2.putText(mask_frame, f"{class_name} - {track.track_id}", (xmin, ymin-4), cv2.FONT_HERSHEY_SIMPLEX, 0.25, text_color)
                frame_rgb = cv2.addWeighted(mask_frame, ALPHA, frame_rgb, 1 - ALPHA, 0)
                
                # draw bounding box
                cv2.rectangle(frame_rgb, (xmin, ymin), (xmax, ymax), color, 2)

                # if new head is appear on image, save image
                if FLAGS['SAVE_ON_NEW_HEAD'] and class_name == 'head' and track.track_id not in head_set:
                    head_set.add(track.track_id)
                    print("new head detected")
                    savePath = os.path.join(os.getcwd(), "outputs", filename, f"{track.track_id}_{datetime.now().strftime('%Y_%m_%d %H_%M_%S')}.jpg")
                    print(savePath)
                    cv2.imwrite(savePath, cv2.hconcat([frame_read, cv2.cvtColor(frame_rgb, cv2.COLOR_BGR2RGB)]))

            # draw fps
            if FLAGS['SHOW_FPS']:
                fps = (fps + (1./(time.time() - t1))) / 2
                cv2.putText(frame_rgb, "FPS: {:.2f}".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 0, 0), 2)

            # show in windows
            if FLAGS['SHOW_ORIGINAL_IMAGE']:
                cv2.imshow('Original', frame_read)

            result_frame = cv2.cvtColor(frame_rgb, cv2.COLOR_BGR2RGB)
            # show result video
            cv2.imshow('Video', result_frame)
            # save result video
            out_write.write(result_frame)
            # press 'q' to quit
            if cv2.waitKey(1) == ord('q'):
                break
        else:
            break
    cap.release()
    out_write.release()
    cv2.destroyAllWindows()
コード例 #5
0
def main(_argv):
    # set present path
    home = os.getcwd()

    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    # model_filename = 'weights/mars-small128.pb'
    model_filename = os.path.join(home, "weights", "arcface_weights.h5")
    encoder = gdet.create_box_encoder(model_filename, batch_size=128)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    times = []

    # Database 생성
    face_db = dict()

    db_path = FLAGS.database
    for name in os.listdir(db_path):
        name_path = os.path.join(db_path, name)
        name_db = []
        for i in os.listdir(name_path):
            if i.split(".")[1] != "jpg": continue
            id_path = os.path.join(name_path, i)
            img = cv2.imread(id_path)
            # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # img_in = tf.expand_dims(img_in, 0)
            # img_in = transform_images(img_in, FLAGS.size)
            # boxes, scores, classes, nums = yolo.predict(img_in)
            boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]])
            scores = np.asarray([[1]])
            converted_boxes = convert_boxes(img, boxes, scores)
            features = encoder(img, converted_boxes)

            if features.shape[0] == 0: continue

            for f in range(features.shape[0]):
                name_db.append(features[f, :])
        name_db = np.asarray(name_db)
        face_db[name] = dict({"used": False, "db": name_db})

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0

    detection_list = []

    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)

        # print(boxes, scores, classes, nums)
        # time.sleep(5)
        t2 = time.time()
        times.append(t2 - t1)
        print(f'yolo predict time : {t2-t1}')
        times = times[-20:]

        t3 = time.time()
        #############
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0], scores[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        t4 = time.time()
        print(f'feature generation time : {t4-t3}')

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        t5 = time.time()
        # Call the tracker
        tracker.predict()
        # tracker.update(detections)
        tracker.update(detections, face_db, FLAGS.max_face_threshold)
        t6 = time.time()
        print(f'tracking time : {t6-t5}')

        frame_index = frame_index + 1
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            face_name = track.get_face_name()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id)) +
                            len(str(face_name))) * 23, int(bbox[1])), color,
                          -1)
            # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            cv2.putText(
                img, class_name + "-" + str(track.track_id) + "-" + face_name,
                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            # print(class_name + "-" + str(track.track_id))

            # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
            if face_name != "":
                detection_list.append(
                    dict({
                        "frame_no": str(frame_index),
                        "id": str(face_name),
                        "x": str(int(bbox[0])),
                        "y": str(int(bbox[1])),
                        "width": str(int(bbox[2]) - int(bbox[0])),
                        "height": str(int(bbox[3]) - int(bbox[1]))
                    }))
        #######
        fps = (fps + (1. / (time.time() - t1))) / 2
        # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
        # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
        #                   cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
        if FLAGS.output:
            out.write(img)
            # frame_index = frame_index + 1
            # list_file.write(str(frame_index)+' ')
            # if len(converted_boxes) != 0:
            #     for i in range(0,len(converted_boxes)):
            #         list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
            # list_file.write('\n')
        cv2.imshow('output', img)
        if cv2.waitKey(1) == ord('q'):
            break

    cv2.destroyAllWindows()

    frame_list = sorted(detection_list,
                        key=lambda x: (int(x["frame_no"]), int(x["id"])))
    # pprint.pprint(frame_list)

    f = open(FLAGS.eval, "w")
    for a in frame_list:
        f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] +
                " " + a["width"] + " " + a["height"] + "\n")
    # 파일 닫기
    f.close()
コード例 #6
0
def Object_tracking(YoloV3,
                    video_path,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors='',
                    Track_only=[]):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    #initialize deep sort object
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    times = []

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    NUM_CLASS = read_class_names(CLASSES)
    key_list = list(NUM_CLASS.keys())
    val_list = list(NUM_CLASS.values())
    while True:
        _, img = vid.read()

        try:
            original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        except:
            break
        image_data = image_preprocess(np.copy(original_image),
                                      [input_size, input_size])
        image_data = tf.expand_dims(image_data, 0)

        t1 = time.time()
        pred_bbox = YoloV3.predict(image_data)
        t2 = time.time()

        times.append(t2 - t1)
        times = times[-20:]

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_image, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        # extract bboxes to boxes (x, y, width, height), scores and names
        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and NUM_CLASS[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(NUM_CLASS[int(bbox[5])])

        # Obtain all the detections for the given frame.
        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(original_image, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        # Pass detections to the deepsort object and obtain the track information.
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 5:
                continue
            bbox = track.to_tlbr()  # Get the corrected/predicted bounding box
            class_name = track.get_class(
            )  #Get the class name of particular object
            tracking_id = track.track_id  # Get the ID for the particular track
            index = key_list[val_list.index(
                class_name)]  # Get predicted object index by object name
            tracked_bboxes.append(
                bbox.tolist() + [tracking_id, index]
            )  # Structure data, that we could use it with our draw_bbox function

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        # draw detection on frame
        image = draw_bbox(original_image,
                          tracked_bboxes,
                          CLASSES=CLASSES,
                          tracking=True)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # draw original yolo detection
        #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)

        #print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
        if output_path != '': out.write(image)
        if show:
            cv2.imshow('output', image)

            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
コード例 #7
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxs, confidence = yolo.detect_image(image)

        features = encoder(frame, boxs)

        detections = [
            Detection(bbox, confidence, feature)
            for bbox, confidence, feature in zip(boxs, confidence, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            score = "%.2f" % round(det.confidence * 100, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, score + '%', (int(bbox[0]), int(bbox[3])), 0,
                        5e-3 * 130, (0, 255, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #8
0
ファイル: main_static.py プロジェクト: Suke0/DeepSORT
def main(yolo):

    start = time.time()
    #Definition of the parameters
    max_cosine_distance = 0.5  #余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3  #非极大抑制的阈值

    counter = []
    #deep_sort
    model_filename = './re_id/mars-small128.pb'
    encoder = create_box_encoder(model_filename, batch_size=32)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True
    #video_path = "./output/output.avi"
    video_capture = cv2.VideoCapture(args["input"])

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(
            './output/' + args["input"][43:57] + "_" + args["class"] +
            '_output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    while True:

        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        t_detect_image = time.time()
        boxs, class_names = yolo.detect_image(image)
        t_detect_image_ = time.time()
        print("t_detect_image" + str(t_detect_image_ - t_detect_image))

        t_detect_image = time.time()
        features = encoder(frame, boxs)
        t_detect_image_ = time.time()
        print("t_encoder" + str(t_detect_image_ - t_detect_image))
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []
        boxes = []
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            #boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                        (color), 2)
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                            (color), 2)

            i += 1
            #bbox_center_point(x,y)
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            #track_id[center]
            pts[track.track_id].append(center)
            thickness = 5
            #center point
            cv2.circle(frame, (center), 1, color, thickness)

            #draw motion path
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (color), thickness)
                #cv2.putText(frame, str(class_names[j]),(int(bbox[0]), int(bbox[1] -20)),0, 5e-3 * 150, (255,255,255),2)

        count = len(set(counter))
        cv2.putText(frame, "Total Object Counter: " + str(count),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(i),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)
        cv2.namedWindow("YOLO3_Deep_SORT", 0)
        cv2.resizeWindow('YOLO3_Deep_SORT', 1024, 768)
        cv2.imshow('YOLO3_Deep_SORT', frame)

        if writeVideo_flag:
            #save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = (fps + (1. / (time.time() - t1))) / 2
        #print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")

    video_capture.release()

    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #9
0
ファイル: doorman.py プロジェクト: alievilya/yolov4-doorman
def main(yolo):
    # Definition of the parameters
    with open("cfg/detection_tracker_cfg.json") as detection_config:
        detect_config = json.load(detection_config)
    with open("cfg/doors_info.json") as doors_config:
        doors_config = json.load(doors_config)
    with open("cfg/around_doors_info.json") as around_doors_config:
        around_doors_config = json.load(around_doors_config)
    model_filename = detect_config["tracking_model"]
    input_folder, output_folder = detect_config["input_folder"], detect_config[
        "output_folder"]
    meta_folder = detect_config["meta_folder"]
    output_format = detect_config["output_format"]

    # Deep SORT
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    show_detections = True
    asyncVideo_flag = False

    check_gpu()

    # from here should start loop to process videos from folder
    # for video_name in os.listdir(input_folder):

    HOST = "localhost"
    PORT = 8075
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.bind((HOST, PORT))
        sock.listen()
        conn, addr = sock.accept()
        with conn:
            print('Connected by', addr)
            #  loop over all videos
            while True:
                data = conn.recv(1000)
                video_motion_list = data.decode("utf-8").split(';')
                videos_que = deque()
                for video_motion in video_motion_list:
                    videos_que.append(video_motion)
                video_name = videos_que.popleft()

                if not video_name.endswith(output_format):
                    continue

                print('elements in que', len(videos_que))
                print("opening video: {}".format(video_name))
                full_video_path = join(input_folder, video_name)
                # full_video_path = "rtsp://*****:*****@192.168.1.52:554/1/h264major"

                meta_name = meta_folder + video_name[:-4] + ".json"
                with open(meta_name) as meta_config_json:
                    meta_config = json.load(meta_config_json)
                camera_id = meta_config["camera_id"]
                if not os.path.exists(output_folder + str(camera_id)):
                    os.mkdir(output_folder + str(camera_id))

                output_name = output_folder + camera_id + '/out_' + video_name
                counter = Counter(counter_in=0, counter_out=0, track_id=0)
                tracker = Tracker(metric)

                if asyncVideo_flag:
                    video_capture = VideoCaptureAsync(full_video_path)
                    video_capture.start()
                    w = int(video_capture.cap.get(3))
                    h = int(video_capture.cap.get(4))
                else:
                    video_capture = cv2.VideoCapture(full_video_path)
                    w = int(video_capture.get(3))
                    h = int(video_capture.get(4))

                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_name, fourcc, 25, (w, h))

                door_array = doors_config["{}".format(camera_id)]
                around_door_array = tuple(
                    around_doors_config["{}".format(camera_id)])
                rect_door = Rectangle(door_array[0], door_array[1],
                                      door_array[2], door_array[3])
                border_door = door_array[3]
                #  loop over video
                save_video_flag = False
                while True:
                    fps_imutils = imutils.video.FPS().start()
                    ret, frame = video_capture.read()
                    if not ret:
                        with open('videos_saved/log_results.txt', 'a') as log:
                            log.write(
                                'processed (ret). Time: {}, camera id: {}\n'.
                                format(video_name, camera_id))
                        break
                    t1 = time.time()
                    # lost_ids = counter.return_lost_ids()
                    image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
                    # image = image.crop(around_door_array)
                    boxes, confidence, classes = yolo.detect_image(image)

                    features = encoder(frame, boxes)
                    detections = [
                        Detection(bbox, confidence, cls, feature)
                        for bbox, confidence, cls, feature in zip(
                            boxes, confidence, classes, features)
                    ]

                    # Run non-maxima suppression.
                    boxes = np.array([d.tlwh for d in detections])
                    scores = np.array([d.confidence for d in detections])
                    classes = np.array([d.cls for d in detections])
                    indices = preprocessing.non_max_suppression(
                        boxes, nms_max_overlap, scores)
                    detections = [detections[i] for i in indices]

                    # Call the tracker
                    tracker.predict()
                    tracker.update(detections)

                    cv2.rectangle(frame,
                                  (int(door_array[0]), int(door_array[1])),
                                  (int(door_array[2]), int(door_array[3])),
                                  (23, 158, 21), 3)
                    if len(detections) != 0:
                        counter.someone_inframe()
                        for det in detections:
                            bbox = det.to_tlbr()
                            if show_detections and len(classes) > 0:
                                score = "%.2f" % (det.confidence * 100) + "%"
                                cv2.rectangle(frame,
                                              (int(bbox[0]), int(bbox[1])),
                                              (int(bbox[2]), int(bbox[3])),
                                              (255, 0, 0), 3)
                    else:
                        if counter.need_to_clear():
                            counter.clear_all()
                    # identities = [track.track_id for track in tracker.tracks]
                    # counter.update_identities(identities)

                    for track in tracker.tracks:
                        if not track.is_confirmed(
                        ) or track.time_since_update > 1:
                            continue
                        bbox = track.to_tlbr()

                        if track.track_id not in counter.people_init or counter.people_init[
                                track.track_id] == 0:
                            # counter.obj_initialized(track.track_id)
                            ratio_init = find_ratio_ofbboxes(
                                bbox=bbox, rect_compare=rect_door)

                            if ratio_init > 0:
                                if ratio_init >= 0.5:  # and bbox[3] < door_array[3]:
                                    counter.people_init[
                                        track.track_id] = 2  # man in the door
                                elif ratio_init < 0.5:  # and bbox[3] > door_array[3]:  # initialized in the outside
                                    counter.people_init[track.track_id] = 1
                            else:
                                counter.people_init[track.track_id] = 1
                            counter.people_bbox[track.track_id] = bbox
                        counter.cur_bbox[track.track_id] = bbox

                        adc = "%.2f" % (track.adc * 100
                                        ) + "%"  # Average detection confidence
                        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                      (int(bbox[2]), int(bbox[3])),
                                      (255, 255, 255), 2)
                        cv2.putText(frame, "ID: " + str(track.track_id),
                                    (int(bbox[0]), int(bbox[1]) + 50), 0,
                                    1e-3 * frame.shape[0], (0, 255, 0), 3)

                        if not show_detections:
                            track_cls = track.cls
                            cv2.putText(frame, str(track_cls),
                                        (int(bbox[0]), int(bbox[3])), 0,
                                        1e-3 * frame.shape[0], (0, 255, 0), 3)
                            cv2.putText(frame, 'ADC: ' + adc,
                                        (int(bbox[0]),
                                         int(bbox[3] + 2e-2 * frame.shape[1])),
                                        0, 1e-3 * frame.shape[0], (0, 255, 0),
                                        3)
                        # if track.time_since_update >= 15:
                        #     id_get_lost.append(track.track_id)
                    id_get_lost = [
                        track.track_id for track in tracker.tracks
                        if track.time_since_update >= 15
                    ]

                    for val in counter.people_init.keys():
                        ratio = 0
                        cur_c = find_centroid(counter.cur_bbox[val])
                        init_c = find_centroid(counter.people_bbox[val])
                        if val in id_get_lost and counter.people_init[
                                val] != -1:
                            ratio = find_ratio_ofbboxes(
                                bbox=counter.cur_bbox[val],
                                rect_compare=rect_door)
                            if counter.people_init[val] == 2 \
                                    and ratio < 0.6:  # and counter.people_bbox[val][3] > border_door \
                                counter.get_out()
                                save_video_flag = True
                                print(counter.people_init[val], ratio)
                            elif counter.people_init[val] == 1 \
                                    and ratio >= 0.6:
                                counter.get_in()
                                save_video_flag = True
                                print(counter.people_init[val], ratio)
                            counter.people_init[val] = -1

                    ins, outs = counter.return_counter()
                    cv2.rectangle(frame, (frame.shape[1] - 150, 0),
                                  (frame.shape[1], 50), (0, 0, 0), -1, 8)
                    cv2.putText(frame, "in: {}, out: {} ".format(ins, outs),
                                (frame.shape[1] - 140, 20), 0,
                                1e-3 * frame.shape[0], (255, 255, 255), 3)
                    out.write(frame)
                    fps_imutils.update()
                    if not asyncVideo_flag:
                        pass
                        # fps = (1. / (time.time() - t1))
                        # print("FPS = %f" % fps)

                        # if len(fpeses) < 15:
                        #     fpeses.append(round(fps, 2))
                        #
                        # elif len(fpeses) == 15:
                        #     # fps = round(np.median(np.array(fpeses)))
                        #     median_fps = float(np.median(np.array(fpeses)))
                        #     fps = round(median_fps, 1)
                        #     print('max fps: ', fps)
                        #     # fps = 20
                        #     counter.fps = fps
                        #     fpeses.append(fps)

                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

                if asyncVideo_flag:
                    video_capture.stop()
                    del video_capture
                else:
                    video_capture.release()

                if save_video_flag:
                    with open('videos_saved/log_results.txt', 'a') as log:
                        log.write(
                            'detected!!! time: {}, camera id: {}, detected move in: {}, out: {}\n'
                            .format(video_name, camera_id, ins, outs))
                        log.write('video written {}\n\n'.format(output_name))
                    out.release()
                else:
                    if out.isOpened():
                        out.release()
                        if os.path.isfile(output_name):
                            os.remove(output_name)

                if os.path.isfile(full_video_path):
                    os.remove(full_video_path)
                if os.path.isfile(meta_name):
                    os.remove(meta_name)
                save_video_flag = False
                cv2.destroyAllWindows()
コード例 #10
0
def deepsort(yolo, args):
    #nms_max_overlap = 0.3 #nms threshold

    images_input = True if os.path.isdir(args.input) else False
    if images_input:
        # get images list
        jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg'))
        jpg_files = glob.glob(os.path.join(args.input, '*.jpg'))
        frame_capture = jpeg_files + jpg_files
        frame_capture.sort()
    else:
        # create video capture stream
        frame_capture = cv2.VideoCapture(0 if args.input ==
                                         '0' else args.input)
        if not frame_capture.isOpened():
            raise IOError("Couldn't open webcam or video")

    # create video save stream if needed
    save_output = True if args.output != "" else False
    if save_output:
        if images_input:
            raise ValueError("image folder input could be saved to video file")

        # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
        # to convert it to x264 to reduce file size:
        # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
        #
        #video_FourCC    = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC))
        video_FourCC = cv2.VideoWriter_fourcc(
            *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v")
        video_fps = frame_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        out = cv2.VideoWriter(args.output, video_FourCC,
                              (5. if args.input == '0' else video_fps),
                              video_size)

    if args.tracking_classes_path:
        # load the object classes used in tracking if have, other class
        # from detector will be ignored
        tracking_class_names = get_classes(args.tracking_classes_path)
    else:
        tracking_class_names = None

    #create deep_sort box encoder
    encoder = create_box_encoder(args.deepsort_model_path, batch_size=1)

    #create deep_sort tracker
    max_cosine_distance = 0.5  #threshold for cosine distance
    nn_budget = None
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    # alloc a set of queues to record motion trace
    # for each track id
    motion_traces = [deque(maxlen=30) for _ in range(9999)]
    total_obj_counter = []

    # initialize a list of colors to represent each possible class label
    np.random.seed(100)
    COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

    i = 0
    fps = 0.0
    while True:
        ret, frame = get_frame(frame_capture, i, images_input)
        if ret != True:
            break
        #time.sleep(0.2)
        i += 1

        start_time = time.time()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # detect object from image
        _, out_boxes, out_classnames, out_scores = yolo.detect_image(image)
        # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h)
        boxes, class_names, scores = get_tracking_object(
            out_boxes, out_classnames, out_scores, tracking_class_names)

        # get encoded features of bbox area image
        features = encoder(frame, boxes)

        # form up detection records
        detections = [
            Detection(bbox, score, feature, class_name)
            for bbox, score, class_name, feature in zip(
                boxes, scores, class_names, features)
        ]

        # Run non-maximum suppression.
        #nms_boxes = np.array([d.tlwh for d in detections])
        #nms_scores = np.array([d.confidence for d in detections])
        #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores)
        #detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # show all detection result as white box
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(det.class_name),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (255, 255, 255), 2)

        track_indexes = []
        track_count = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # record tracking info and get bbox
            track_indexes.append(int(track.track_id))
            total_obj_counter.append(int(track.track_id))
            bbox = track.to_tlbr()

            # show all tracking result as color box
            color = [
                int(c)
                for c in COLORS[track_indexes[track_count] % len(COLORS)]
            ]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (color), 2)
            if track.class_name:
                cv2.putText(frame, str(track.class_name),
                            (int(bbox[0] + 30), int(bbox[1] - 20)), 0,
                            5e-3 * 150, (color), 2)

            track_count += 1

            # get center point (x,y) of current track bbox and record in queue
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            motion_traces[track.track_id].append(center)

            # draw current center point
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            #draw motion trace
            motion_trace = motion_traces[track.track_id]
            for j in range(1, len(motion_trace)):
                if motion_trace[j - 1] is None or motion_trace[j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]),
                         (color), thickness)

        # show tracking statistics
        total_obj_num = len(set(total_obj_counter))
        cv2.putText(frame, "Total Object Counter: " + str(total_obj_num),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(track_count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)

        # refresh window
        cv2.namedWindow("DeepSORT", 0)
        cv2.resizeWindow('DeepSORT', 1024, 768)
        cv2.imshow('DeepSORT', frame)

        if save_output:
            #save a frame
            out.write(frame)

        end_time = time.time()
        fps = (fps + (1. / (end_time - start_time))) / 2
        # Press q to stop video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything if job is finished
    if not images_input:
        frame_capture.release()
    if save_output:
        out.release()
    cv2.destroyAllWindows()
def main():
##########################################################################################################
    #preparation part
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()
    
    num_classes = 80
    
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    
    assert inp_dim % 32 == 0                   #assert后面语句为false时触发,中断程序
    assert inp_dim > 32

    if CUDA:
        model.cuda()
            
    model.eval()
    
    global confirm
    global person
    
    fps = 0.0
    count = 0
    frame = 0    
    person = []
    confirm = False
    reconfirm = False
    count_yolo = 0
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1) 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True)

    cap = cv2.VideoCapture(0)

    detect_time = []
    recogn_time = []
    kalman_time = []
    aux_time = []
    while True:
        start = time.time()  
        ret, color_image = cap.read()
        '''
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())
        '''
        if color_image is None:
            break
        img, orig_im, dim = prep_image(color_image, inp_dim)
        
        im_dim = torch.FloatTensor(dim).repeat(1,2)             
##########################################################################################################
        #people detection part                
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        time_a = time.time()
        if count_yolo %3 == 0:                                                               #detect people every 3 frames
            output = model(Variable(img), CUDA)                         #适配后的图像放进yolo网络中,得到检测的结果
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)         


            if type(output) == int:
                fps  = ( fps + (1./(time.time()-start)) ) / 2
                print("fps= %f"%(fps))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
        
            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim                #夹紧张量,限制在一个区间内
        
            #im_dim = im_dim.repeat(output.size(0), 1)
            output[:,[1,3]] *= color_image.shape[1]
            output[:,[2,4]] *= color_image.shape[0]
            output = output.cpu().numpy() 
            output = sellect_person(output)                                       #把标签不是人的output去掉,减少计算量
            output = np.array(output)
            output_update = output
        elif count_yolo %3 != 0:
            output = output_update
        count_yolo += 1
        list(map(lambda x: write(x, orig_im), output))                #把结果加到原来的图像中   
        #output的[0,1:4]分别为框的左上和右下的点的位置
        detect_time.append(time.time() - time_a)
##########################################################################################################
        time_a = time.time()
        #kalman filter part
        outputs_tlwh = to_tlwh(output)                             ##把output数据变成适合kalman更新的类型
        features = encoder(orig_im,outputs_tlwh)
        detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            box = track.to_tlbr()
            cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2)
            cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2)  
        
        kalman_time.append(time.time() - time_a)
##########################################################################################################
        #face recognition part
        time_a = time.time()
        if confirm == False:
            saved_model = './ArcFace/model/068.pth'
            name_list = os.listdir('./users')
            path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list]
            total_features = np.empty((128,),np.float32)

            for i in path_list:
                temp = np.loadtxt(i)
                total_features = np.vstack((total_features,temp))
            total_features = total_features[1:]

            #threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
            threshold = 0.5
            model_facenet = mobileFaceNet()
            model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
            model_facenet.eval()
            #use_cuda = torch.cuda.is_available() and True
            #device = torch.device("cuda" if use_cuda else "cpu")
            device = torch.device("cuda")

            # is_cuda_avilable
            trans = transforms.Compose([
                transforms.Resize((112,112)),
                transforms.ToTensor(),
                transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
            ])
            model_facenet.to(device)

            img = Image.fromarray(color_image)
            bboxes, landmark = detect_faces(img)                                                                  #首先检测脸

            if len(bboxes) == 0:
                print('detect no people')
            else:
                for bbox in bboxes:
                    loc_x_y = [bbox[2], bbox[1]]
                    person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy()              #从图像中截取框
                    feature = np.squeeze(get_feature(person_img, model_facenet, trans, device))                               #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if  cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]  
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im,person,font,loc_x_y)                                                                    #加名字
                    cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))           #加box
            #cv2.imshow("frame", orig_im)

##########################################################################################################
            #confirmpart
            print('confirmation rate: {} %'.format(count*10))
            cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
            if len(bboxes)!=0 and len(output)!=0:
                if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person:
                    count+=1
            frame+=1
            if count>=10 and frame<=30:
                confirm = True
                print('confirm the face is belong to that people')
            elif  frame >= 30:
                print('fail confirm, and start again')
                reconfirm = True
                count = 0
                frame = 0
            if reconfirm == True:
                cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)                   
##########################################################################################################
        recogn_time.append(time.time() - time_a)
        time_a = time.time()
        #show the final output result
        if not confirm:
            cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20),
                                     cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2)
        #把识别的名字加上去
        if confirm:  
            for track in tracker.tracks:
                bbox = track.to_tlbr()
                if track.track_id == 1:
                    cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20),
                                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
                
                    #rate.sleep()
        cv2.imshow("frame", orig_im)
        #out.write(orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        
        aux_time.append(time.time()-time_a)
        fps  = ( fps + (1./(time.time()-start)) ) / 2
        print("fps= %f"%(fps))
    #calculate how long each part takes
    avg_detect_time = np.mean(detect_time)
    avg_recogn_time = np.mean(recogn_time)
    avg_kalman_time = np.mean(kalman_time)
    avg_aux_time = np.mean(aux_time)
    print("avg detect: {}".format(avg_detect_time))
    print("avg recogn: {}".format(avg_recogn_time))
    print("avg kalman: {}".format(avg_kalman_time))
    print("avg aux: {}".format(avg_aux_time))
    print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
コード例 #12
0
def main():
    signal.signal(signal.SIGINT, signal_handler)
    print('Running. Press Ctrl + C to exit.')
    print (DEFAULT_LABEL_MAP_PATH)

    # parse arguments
    parser = argparse.ArgumentParser(description='--- Raspbery Pi Urban Mobility Tracker ---')
    parser.add_argument('-modelpath', dest='model_path', type=str, required=False, help='specify path of a custom detection model')
    parser.add_argument('-labelmap', dest='label_map_path', default=DEFAULT_LABEL_MAP_PATH, type=str, required=False, help='specify the label map text file')
    parser.add_argument('-imageseq', dest='image_path', type=str, required=False, help='specify an image sequence')
    parser.add_argument('-video', dest='video_path', type=str, required=False, help='specify video file')
    parser.add_argument('-camera', dest='camera', default=True, action='store_true', help='specify this when using the rpi camera as the input')
    parser.add_argument('-threshold', dest='threshold', type=float, default=0.5, required=False, help='specify a custom inference threshold')
    parser.add_argument('-tpu', dest='tpu', required=False, default=True, action='store_true', help='add this when using a coral usb accelerator')
    parser.add_argument('-nframes', dest='nframes', type=int, required=False, default=10, help='specify nunber of frames to process')
    parser.add_argument('-display', dest='live_view', required=False, default=True, action='store_true', help='add this flag to view a live display. note, that this will greatly slow down the fps rate.')
    parser.add_argument('-save', dest='save_frames', required=False, default=False, action='store_true', help='add this flag if you want to persist the image output. note, that this will greatly slow down the fps rate.')
    args = parser.parse_args()
    
    # basic checks
    if args.model_path: assert args.label_map_path, "when specifying a custom model, you must also specify a label map path using: '-labelmap <path to labelmap.txt>'"
    if args.model_path: assert os.path.exists(args.model_path)==True, "can't find the specified model..."
    if args.label_map_path: assert os.path.exists(args.label_map_path)==True, "can't find the specified label map..."
    if args.video_path: assert os.path.exists(args.video_path)==True, "can't find the specified video file..."


    print('> INITIALIZING UMT...')
    print('   > THRESHOLD:',args.threshold)

	# parse label map
    labels = parse_label_map(args, DEFAULT_LABEL_MAP_PATH)
    
    # initialize detector
    interpreter = initialize_detector(args)

    # create output directory
    if not os.path.exists('output') and args.save_frames: os.makedirs('output')
 
 	# initialize deep sort tracker   
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", MAX_COSINE_DIST, NN_BUDGET)
    tracker = Tracker(metric) 

    # initialize image source
    img_generator = initialize_img_source(args)

    # initialize plot colors (if necessary)
    if args.live_view or args.save_frames: COLORS = (np.random.rand(32, 3) * 255).astype(int)

    # main tracking loop
    print('\n> TRACKING...')
    #with open(TRACKER_OUTPUT_TEXT_FILE, 'w') as out_file:

    for i, pil_img in enumerate(img_generator(args)):
    
        f_time = int(time.time())
        print('> FRAME:', i)
        
        # add header to trajectory file
        '''
        if i == 0:
            header = (f'frame_num, rpi_time, obj_class, obj_id, obj_age,'
                'obj_t_since_last_update, obj_hits,'
                'xmin, ymin, xmax, ymax')

            tracked_list.append(header)
        '''
        # get detections
        detections = generate_detections(pil_img, interpreter, args.threshold)
        
        # proceed to updating state
        if len(detections) == 0: print('> no detections...')
        else:
        
            # update tracker
            tracker.predict()
            tracker.update(detections)
            
            # save object locations
            if len(tracker.tracks) > 0:
                for track in tracker.tracks:
                    bbox = track.to_tlbr()
                    class_name = labels[track.get_class()]
                    row = (f'{i},{f_time},{class_name},'
                        f'{track.track_id},{int(track.age)},'
                        f'{int(track.time_since_update)},{str(track.hits)},'
                        f'{int(bbox[0])},{int(bbox[1])},'
                        f'{int(bbox[2])},{int(bbox[3])}')
                    tracked_list.append(row)
            
        # only for live display
        if args.live_view or args.save_frames:
        
            # convert pil image to cv2
            cv2_img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
        
            # cycle through actively tracked objects
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                
                # draw detections and label
                bbox = track.to_tlbr()
                class_name = labels[track.get_class()]
                color = COLORS[int(track.track_id) % len(COLORS)].tolist()
                cv2.rectangle(cv2_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(cv2_img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(str(class_name))+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
                cv2.putText(cv2_img, str(class_name) + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,255), 1)

            # live view
            if args.live_view:
                cv2.imshow("tracker output", cv2_img)
                cv2.waitKey(1)
                
            # persist frames
            if args.save_frames: cv2.imwrite(f'output/frame_{i}.jpg', cv2_img)
            
    cv2.destroyAllWindows()         
    pass
コード例 #13
0
def main(yolo):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    #video_capture = cv2.VideoCapture(0)
    #video_capture = cv2.VideoCapture('videos/soccer_01.mp4')
    video_capture = cv2.VideoCapture('videos/M0902.avi')
    # video_capture = cv2.VideoCapture('videos/uav123_car6.avi')
    #video_capture = cv2.VideoCapture('videos/car/car_11.mp4')


    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    i=0;
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()
        # i+=1
        # if i%2!=1:
            # continue;
        image = Image.fromarray(frame)
        boxs, out_classes = yolo.detect_image(image)
       # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        print("indices:",indices)
        print("detection: ",detections[indices[0]]);
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        #print("fps= %f"%(fps))  

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)


        for j,det in enumerate(detections):
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            cv2.putText(frame,str(out_classes[j]),(int(bbox[0]),int(bbox[1])-35),0,5e-3*200,(143,17,86),2)
        cv2.namedWindow("track result", 0)
        cv2.resizeWindow("track result", 1280, 720)  
        cv2.imshow('track result', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            
       
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #14
0
    def detect_yolo(self, input):
        try:
            this_dir = os.path.dirname(__file__)
            yolo = YOLO()

            max_cosine_distance = 0.3
            nn_budget = None
            nms_max_overlap = 1.0

            # deep_sort
            model_filename = os.path.join(this_dir, 'models/mars-small128.pb')
            encoder = gdet.create_box_encoder(model_filename, batch_size=1)

            metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
            tracker = Tracker(metric)
            writeVideo_flag = True

            video_capture = cv2.VideoCapture(input)
            if writeVideo_flag:
                # Define the codec and create VideoWriter object
                w = int(video_capture.get(3))
                h = int(video_capture.get(4))
                fourcc = cv2.VideoWriter_fourcc(*'MJPG')
                out = cv2.VideoWriter(os.path.join(this_dir, 'data/output.avi'), fourcc, 15, (w, h))
                list_file = open(os.path.join(this_dir, 'data/detection.txt'), 'w')
                frame_index = -1

            fps = 0.0
            n = 0
            skip_frame = 5
            while True:
                ret, frame = video_capture.read()  # frame shape 640*480*3
                if ret != True:
                    break

                if int(skip_frame) != n:
                    n += 1
                    continue
                n = 0

                t1 = time.time()

                image = Image.fromarray(frame)
                boxs, classes = yolo.detect_image(image)

                for idb, box in enumerate(boxs):
                    # cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[0]) + int(box[2]), int(box[1]) + int(box[3])),
                    #               (255, 255, 255), 2)
                    cv2.putText(frame, str(classes[idb]), (int(box[0]), int(box[1])), 0, 5e-3 * 100, (0, 255, 0), 2)

                features = encoder(frame, boxs)

                # score to 1.0 here).
                detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]

                # Run non-maxima suppression.
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]

                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:
                    if track.is_confirmed() and track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                    cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2)

                for idb, det in enumerate(detections):
                    bbox = det.to_tlbr()
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)


                # cv2.imshow('gallery', frame)

                if writeVideo_flag:
                    # save a frame
                    out.write(frame)
                    frame_index = frame_index + 1
                    list_file.write(str(frame_index) + ' ')
                    if len(boxs) != 0:
                        for i in range(0, len(boxs)):
                            list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(
                                boxs[i][3]) + ';')
                    list_file.write('\n')

                fps = (fps + (1. / (time.time() - t1))) / 2
                print("fps= %f" % (fps))

                # Press Q to stop!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            video_capture.release()
            if writeVideo_flag:
                out.release()
                list_file.close()
            cv2.destroyAllWindows()
            msg = "process finished!!!"
        except Exception as e:
            print(e)
            msg = "process error!!!"
        return msg
コード例 #15
0
class DeepSort(object):
    def __init__(self,
                 max_age=30,
                 nms_max_overlap=1.0,
                 max_cosine_distance=0.2,
                 nn_budget=None,
                 override_track_class=None,
                 clock=None,
                 half=True):
        '''
        Input Params:
            - nms_max_overlap: Non-maxima suppression threshold: Maximum detection overlap
            - max_cosine_distance: Gating threshold for cosine distance
            - nn_budget: Maximum size of the appearance descriptors, if None, no budget is enforced
        '''
        print('Initialising DeepSort..')
        # self.video_info = video_info
        # assert clock is not None
        self.nms_max_overlap = nms_max_overlap
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric,
                               max_age=max_age,
                               override_track_class=override_track_class,
                               clock=clock)
        self.embedder = Embedder(half=half)
        print('DeepSort Tracker initialised!')

    def update_tracks(self, frame, raw_detections):
        """Run multi-target tracker on a particular sequence.

        Parameters
        ----------
        frame : ndarray
            Path to the MOTChallenge sequence directory.
        raw_detections : list
            List of triples ( [left,top,w,h] , confidence, detection_class)

        Returns
        -------
        list of track objects (Look into track.py for more info or see "main" section below in this script to see simple example)

        """

        results = []

        raw_detections = [
            d for d in raw_detections if d[0][2] > 0 and d[0][3] > 0
        ]

        embeds = self.generate_embeds(frame, raw_detections)
        # Proper deep sort detection objects that consist of bbox, confidence and embedding.
        detections = self.create_detections(frame, raw_detections, embeds)

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes,
                                                    self.nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        self.tracker.predict()
        self.tracker.update(detections)

        return self.tracker.tracks

    def generate_embeds(self, frame, raw_dets):
        crops = []
        im_height, im_width = frame.shape[:2]
        for detection in raw_dets:
            if detection is None:
                continue
            l, t, w, h = [int(x) for x in detection[0]]
            r = l + w
            b = t + h
            crop_l = max(0, l)
            crop_r = min(im_width, r)
            crop_t = max(0, t)
            crop_b = min(im_height, b)
            crops.append(frame[crop_t:crop_b, crop_l:crop_r])
        return self.embedder.predict(crops)

    def create_detections(self, frame, raw_dets, embeds):
        detection_list = []
        for i in range(len(embeds)):
            detection_list.append(
                Detection(raw_dets[i][0], raw_dets[i][1], embeds[i]))
        return detection_list

    def refresh_track_ids(self):
        self.tracker._next_id
コード例 #16
0
def main(_argv):
    region = load_ROI()


    # Definition of the parameters
    max_cosine_distance = 0.3  #Default = 0.5
    nn_budget = None
    nms_max_overlap = 0.8      #Default = 0.5 

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    video_name = os.path.splitext(FLAGS.video)[-2]

    weights = 'weights/yolov3_sang.tf'
    yolo.load_weights(weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')


    #WRITE RESULT
    
    result = "tracking_result/{}_track.txt".format(video_name)
    file_out = open(result,'w')
    path = os.getcwd()
    path = str(os.path.split(os.path.split(path)[0])[0])
    #vid_path = os.path.join(path,"Data/{}/{}.mp4".format(video_name,video_name))
    vid_path = os.path.join(path,"data/test_data/{}.mp4".format(video_name))
    vid = cv2.VideoCapture(vid_path)
    out = None

    
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
    
    frame_index = -1 
    
    fps = 0.0
    count = 0 
    while True:
        _, img = vid.read()

        if img is None:
            break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)    
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
        
        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]        

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        frame_index = frame_index + 1
        if frame_index % 100 == 0: 
            print('FRAME: ',frame_index)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
            x_cen = int((int(bbox[2]) + int(bbox[0]))/2)
            y_cen = int((int(bbox[3]) + int(bbox[1]))/2)

            if is_in_region((int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),region) == False:  #NGOAI ROI THI XOA
                track.delete_track()

            cv2.putText(img,"FRAME: "+ str(frame_index),(0,45),cv2.FONT_HERSHEY_COMPLEX_SMALL,1,(0,255,0),2)
            
            #GHI FILE TRACKING_RESULT theo chuan CountMovement
            bb_width = int(bbox[2]) - int(bbox[0])
            bb_height = int(bbox[3]) - int(bbox[1])
            diagonal = math.sqrt(bb_height**2 + bb_width**2)
            file_out.write("{},{},{},{},{},{},{},{},{}\n".format(frame_index,track.track_id,x_cen,y_cen,diagonal,-1.0,class_to_classNumber(str(class_name)),bb_width,bb_height))

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        for det in detections:
            bbox = det.to_tlbr() 
            cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(0,255,0), 1)
        
        # print fps on screen 
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
コード例 #17
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        _, img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #18
0
def detect_video_with_deepsort(yolo,
                               video_path,
                               rot_number,
                               output_path="",
                               deepsort_model_filename=None):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    vid = cv2.VideoCapture(video_path)
    if not vid.isOpened():
        raise IOError("Couldn't open webcam or video")
    video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
    video_fps = vid.get(cv2.CAP_PROP_FPS)
    video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
                  int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    isOutput = True if output_path != "" else False
    if isOutput:
        print("!!! TYPE:", type(output_path), type(video_FourCC),
              type(video_fps), type(video_size))
        out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
    accum_time = 0
    curr_fps = 0
    fps = "FPS: ??"
    prev_time = timer()

    # deep_sort 加载
    encoder = gdet.create_box_encoder(deepsort_model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    while True:
        return_value, frame_bgr = vid.read()

        #目标检测使用frame_rbg格式,因为训练时是用rfg图片训练的,deepsort使用bgr格式图片,因为原始代码是这样
        # 1、opencv是以brg方式打开的,所以要转换成rbg才能识别
        frame_rbg = cv2.cvtColor(frame_bgr.copy(), cv2.COLOR_BGR2RGB)

        # 2、图片旋转
        frame_rbg = np.rot90(frame_rbg, rot_number)

        # 3、yolo检测,输出的是tlbr
        frame_rbg_Image = Image.fromarray(frame_rbg)
        out_boxes_tlbr, out_scores, out_classes = yolo.get_detect_boxes(
            frame_rbg_Image)

        #4、将目标检测输出的tlbr框转成tlwh框
        out_boxes_tlwh = []
        out_boxes_tlbr_1 = copy.deepcopy(out_boxes_tlbr)  #如果列表中有列表,只能使用深度复制列表
        if len(out_boxes_tlbr_1) != 0:
            for bbox in out_boxes_tlbr_1:
                bbox[2:] -= bbox[:2]
                out_boxes_tlwh.append(bbox)
                # print('out_boxes:',out_boxes[i])

        features = encoder(frame_bgr, out_boxes_tlwh)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(out_boxes_tlwh, features)
        ]
        # 5、Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        #6 、deepsort跟踪
        tracker.predict()
        tracker.update(detections)

        #7 、deepsort跟踪画框
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # 4、将tlwh转成tlbr
            bbox = track.to_tlbr()
            cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame_bgr, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        #8、目标检测画框
        detections = out_boxes_tlbr
        for bbox in detections:
            cv2.rectangle(frame_bgr, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        #
        curr_time = timer()
        exec_time = curr_time - prev_time
        prev_time = curr_time
        accum_time = accum_time + exec_time
        curr_fps = curr_fps + 1
        if accum_time > 1:
            accum_time = accum_time - 1
            fps = "FPS: " + str(curr_fps)
            curr_fps = 0
        cv2.putText(frame_bgr,
                    text=fps,
                    org=(3, 15),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=0.50,
                    color=(255, 0, 0),
                    thickness=2)
        # cv2.namedWindow("result", cv2.WINDOW_NORMAL)

        # if isOutput:
        #     out.write(result)
        cv2.imshow('', frame_bgr)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    yolo.close_session()
コード例 #19
0
def main(yolo, url, CreateBoxEncoder, q):
    producer = None
    if KAFKA_ON:
        ip_port = '{}:{}'.format(KAFKA_IP, KAFKA_PORT)
        producer = KafkaProducer(bootstrap_servers=ip_port)
        logger.debug('open kafka')
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    door = get_door(url)
    #    init   var
    center_mass = {}
    miss_ids = []
    disappear_box = {}
    person_list = []
    in_house = {}
    in_out_door = {"out_door_per": 0, "into_door_per": 0}
    only_id = str(uuid.uuid4())
    logger.debug('rtmp: {} load finish'.format(url))
    last_person_num = 0
    last_monitor_people = 0
    while True:
        t1 = time.time()
        if q.empty():
            continue
        frame = q.get()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxs, scores_ = yolo.detect_image(image)
        t2 = time.time()
        # print('5====={}======{}'.format(os.getpid(), round(t2 - t1, 4)))
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        logger.debug("box_num: {}".format(len(boxs)))
        features = CreateBoxEncoder.encoder(frame, boxs)
        # score to 1.0 here).
        # detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        detections = [
            Detection(bbox, scores_, feature)
            for bbox, scores_, feature in zip(boxs, scores_, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        # 实时人员ID保存
        track_id_list = []

        cv2.rectangle(frame, (door[0], door[1]), (door[2], door[3]),
                      (0, 0, 255), 2)
        door_half_h = int(int((door[1] + door[3]) / 2) * DOOR_HIGH)
        cv2.line(frame, (0, door_half_h), (111111, door_half_h), (0, 255, 0),
                 1, 1)
        high_score_ids = {}
        for track in tracker.tracks:
            # 当跟踪的目标在未来的20帧未出现,则判断丢失,保存至消失的id中间区
            if track.time_since_update == MAX_AGE:
                miss_id = str(track.track_id)
                miss_ids.append(miss_id)
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # 如果人id存在,就把人id的矩形框坐标放进center_mass 否则 创建一个key(人id),value(矩形框坐标)放进center_mass
            track_id = str(track.track_id)
            bbox = track.to_tlbr()
            near_door = is_near_door({track_id: bbox}, door)
            if track.score >= 0.92 and not near_door:
                high_score_ids[track_id] = [[
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3])
                ]]

            track_id_list.append(track_id)

            if track_id in center_mass:
                center_ = center_mass.get(track_id)
                if len(center_) > 49:
                    center_.pop(0)
                center_.append(
                    [int(bbox[0]),
                     int(bbox[1]),
                     int(bbox[2]),
                     int(bbox[3])])
            else:
                center_mass[track_id] = [[
                    int(bbox[0]),
                    int(bbox[1]),
                    int(bbox[2]),
                    int(bbox[3])
                ]]

            # # --------------------------------------------
            # # logger.debug('box1:{}'.format([int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]))
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
            x0, y0 = int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)
            cv2.putText(frame, str(round(track.score, 3)), (x0, y0), 0, 0.6,
                        (0, 255, 0), 2)
            # cv2.circle(frame, (x0, y0), 2, (0, 255, 255), thickness=2, lineType=1, shift=0)
            # # --------------------------------------------

            # x0, y0 = int((bbox[0] + bbox[2]) / 2), int((bbox[1] + bbox[3]) / 2)
            # w = abs(int(bbox[3]) - int(bbox[1]))
            # h = abs(int(bbox[2]) - int(bbox[0]))
            logger.info('id:{}, score:{}'.format(track_id, track.score))

        for id in miss_ids:
            if id in center_mass.keys():
                disappear_box[id] = center_mass[id]
                del center_mass[id]
        miss_ids.clear()

        # # 进出门判断
        out_or_in(center_mass, door, in_house, disappear_box, in_out_door)
        # near_door = is_near_door(center_mass, door, disappear_id)

        # 相对精准识别人 用来实时传递当前人数
        box_score_person = [scores for scores in scores_ if scores > 0.72]
        person_sum = in_out_door['into_door_per'] - in_out_door['out_door_per']
        # if person_sum <= len(high_score_ids) and not near_door:
        if person_sum <= len(high_score_ids):
            # 当时精准人数大于进出门之差时 来纠正进门人数 并把出门人数置为0
            if person_sum == len(high_score_ids) == 1:
                pass
                # print('person_sum == len(high_score_ids) == 1')
            else:
                logger.warning('reset in_out_door person')
                in_out_door['out_door_per'] = 0
                in_out_door['into_door_per'] = len(high_score_ids)
                in_house.update(high_score_ids)
                # print('high score:{}'.format(high_score_ids))
                logger.warning('22222222-id: {} after into of door: {}'.format(
                    in_house.keys(), in_out_door['into_door_per']))
                person_sum = len(high_score_ids)
        if in_out_door['into_door_per'] == in_out_door['out_door_per'] > 0:
            in_out_door['into_door_per'] = in_out_door['out_door_per'] = 0
        if len(person_list) > 100:
            person_list.pop(0)
        person_list.append(person_sum)
        # 从url提取摄像头编号
        pattern = str(url)[7:].split(r"/")
        logger.debug('pattern {}'.format(pattern[VIDEO_CONDE]))
        video_id = pattern[VIDEO_CONDE]
        logger.info('object tracking cost {}'.format(time.time() - t1))
        # 当列表中都是0的时候 重置进出门人数和所有字典参数变量
        if person_list.count(0) == len(person_list) == 101:
            logger.debug('long time person is 0')
            in_out_door['into_door_per'] = 0
            in_out_door['out_door_per'] = 0
            in_house.clear()
            logger.warning('All Clear')
        cv2.putText(frame, "person: " + str(person_sum), (40, 40), 0,
                    5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "now_per: " + str(len(box_score_person)), (280, 40),
                    0, 5e-3 * 200, (0, 255, 0), 2)

        # 当满足条件时候 往前端模块发送人员的信息
        if (last_person_num != person_sum
                or last_monitor_people != len(box_score_person)) and producer:
            monitor_people_num = len(box_score_person)
            logger.debug("person-sum:{} monitor-people_num:{}".format(
                person_sum, monitor_people_num))
            # if int(time.time()) - last_time >= 1:
            cv2.imwrite(
                "/opt/code/deep_sort_yolov3/image/{}.jpg".format(
                    str(uuid.uuid4())), frame)
            # print('save img success')
            save_to_kafka(TOPIC_SHOW, now, person_sum, url, producer, video_id,
                          monitor_people_num, only_id)
            if last_person_num > 0 and person_sum == 0:
                only_id = str(uuid.uuid4())

            if last_person_num == 0 and person_sum > 0:
                save_to_kafka(TOPIC_NVR, now, person_sum, url, producer,
                              video_id, len(box_score_person), only_id)

            # last_time = int(time.time())
            last_person_num = person_sum
            last_monitor_people = len(box_score_person)
        # 当满足条件时候 往NVR模块发送信息

        logger.info('url:{} into_door_per: {}'.format(
            url, in_out_door['into_door_per']))
        logger.info('url:{} out_door_per: {}'.format(
            url, in_out_door['out_door_per']))
        logger.info('url:{} in_house: {}'.format(url, in_house))
        logger.info('url:{} monitor_people_num: {}'.format(
            url, len(box_score_person)))
        logger.info('url:{} person_sum: {}'.format(url, person_sum))
        logger.info('GPU image load cost {}'.format(time.time() - t1))
        t3 = time.time()
        fps = round(1 / (round(t3 - t1, 4)), 3)
        # print('pid:{}===fps:{}===time:{}'.format(os.getpid(), fps, round(t3 - t1, 4)))
        # print('*' * 30)
        fps = ((1 / (time.time() - t1)))
        logger.debug("fps= %f" % (fps))
        cv2.imshow('', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
コード例 #20
0
ファイル: deepdish.py プロジェクト: mor1/deepdish
class Pipeline:
    """Object detection and tracking pipeline"""
    def __init__(self, args, input=None):
        self.args = args

        # Initialise camera & camera viewport
        self.init_camera(input)
        # Initialise output
        self.init_output(self.args.output)

        # Initialise object detector (for some reason it has to happen
        # here & not within detect_objects(), or else the inference engine
        # gets upset and starts throwing NaNs at me. Thanks, Python.)
        self.object_detector = SSD_MOBILENET(wanted_label='person',
                                             model_file=self.args.model,
                                             label_file=self.args.labels,
                                             num_threads=self.args.num_threads)

        # Initialise feature encoder
        if self.args.encoder_model is None:
            model_filename = '{}/mars-64x32x3.pb'.format(
                self.args.deepsorthome)
        else:
            model_filename = self.args.encoder_model

        self.encoder = gdet.create_box_encoder(
            model_filename, batch_size=self.args.encoder_batch_size)

        # Initialise tracker
        nn_budget = None
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", self.args.max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric,
                               max_iou_distance=self.args.max_iou_distance,
                               max_age=self.args.max_age)

        # Initialise database
        self.db = {}
        self.delcount = 0
        self.intcount = 0
        self.poscount = 0
        self.negcount = 0

        self.loop = asyncio.get_event_loop()

    def init_camera(self, input):
        if input is None:
            self.input = self.args.input
        else:
            self.input = input
        self.cap = cv2.VideoCapture(self.input)

        # Configure the 'counting line' in the camera viewport
        if self.args.line is None:
            w, h = self.args.camera_width, self.args.camera_height
            self.countline = np.array([[w / 2, 0], [w / 2, h]], dtype=int)
        else:
            self.countline = np.array(list(
                map(int,
                    self.args.line.strip().split(','))),
                                      dtype=int).reshape(2, 2)
        self.cameracountline = self.countline.astype(float)

    def init_output(self, output):
        self.color_mode = None  # fixme
        fourcc = cv2.VideoWriter_fourcc(*'MP4V')
        fps = self.cap.get(cv2.CAP_PROP_FPS)
        (w, h) = (self.args.camera_width, self.args.camera_height)
        self.backbuf = Image.new("RGBA", (w, h), (0, 0, 0, 0))
        self.draw = ImageDraw.Draw(self.backbuf)
        self.output = cv2.VideoWriter(self.args.output, fourcc, fps, (w, h))

    def read_frame(self):
        ret, frame = self.cap.read()
        return (frame, time.time())

    async def capture(self, q):
        try:
            with concurrent.futures.ThreadPoolExecutor() as pool:
                while self.running:
                    (frame, t_frame) = await self.loop.run_in_executor(
                        pool, self.read_frame)
                    #print(frame)
                    if frame is None:
                        print('Frame is None')
                        break
                    await q.put((frame, t_frame))
                    await asyncio.sleep(1.0 / 30.0)
        finally:
            self.cap.release()

    def run_object_detector(self, image):
        t1 = time.time()
        boxes = self.object_detector.detect_image(image)
        t2 = time.time()
        return (boxes, t2 - t1)

    async def detect_objects(self, q_in, q_out):
        # Initialise background subtractor
        backSub = cv2.createBackgroundSubtractorMOG2()

        frameCount = 0
        with concurrent.futures.ThreadPoolExecutor() as pool:
            while self.running:
                frameCount += 1

                # Obtain next video frame
                (frame, t_frame) = await q_in.get()

                if self.args.camera_flip:
                    # If we need to flip the image vertically
                    frame = cv2.flip(frame, 0)

                # Apply background subtraction to find image-mask of areas of motion
                fgMask = backSub.apply(frame)

                # Convert to PIL Image
                image = Image.fromarray(
                    cv2.cvtColor(frame, cv2.COLOR_BGRA2RGBA))

                # Run object detection engine within a Thread Pool
                (boxes0, delta_t) = await self.loop.run_in_executor(
                    pool, self.run_object_detector, image)

                # Filter object detection boxes, including only those with areas of motion
                boxes = []
                for (x, y, w, h) in boxes0:
                    x, y, w, h = int(x), int(y), int(w), int(h)
                    # Check if the box includes any detected motion
                    if np.any(fgMask[x:x + w, y:y + h]):
                        boxes.append((x, y, w, h))

                # Send results to next step in pipeline
                elements = [
                    FrameInfo(t_frame, frameCount),
                    CameraImage(image),
                    CameraCountLine(self.cameracountline),
                    TimingInfo('Object detection latency', 'objd', delta_t)
                ]
                await q_out.put((frame, boxes, elements))

    async def encode_features(self, q_in, q_out):
        with concurrent.futures.ThreadPoolExecutor() as pool:
            while self.running:
                # Obtain next video frame and object detection boxes
                (frame, boxes, elements) = await q_in.get()

                # Run feature encoder within a Thread Pool
                features = await self.loop.run_in_executor(
                    pool, self.encoder, frame, boxes)

                # Build list of 'Detection' objects and send them to next step in pipeline
                detections = [
                    Detection(bbox, 1.0, feature)
                    for bbox, feature in zip(boxes, features)
                ]
                await q_out.put((detections, elements))

    async def track_objects(self, q_in, q_out):
        while self.running:
            (detections, elements) = await q_in.get()
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, self.args.nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            self.tracker.predict()
            self.tracker.update(detections)
            await q_out.put((detections, elements))

    async def process_results(self, q_in, q_out):
        while self.running:
            (detections, elements) = await (q_in.get())

            for track in self.tracker.deleted_tracks:
                i = track.track_id
                if track.is_deleted():
                    self.check_deleted_track(track.track_id)

            for track in self.tracker.tracks:
                i = track.track_id
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                if i not in self.db:
                    self.db[i] = []

                bbox = track.to_tlbr()

                # Find the bottom-centre of the bounding box & add it to the tracking database
                bottomCentre = np.array([(bbox[0] + bbox[2]) / 2.0, bbox[3]])
                self.db[i].append(bottomCentre)

                if len(self.db[i]) > 1:
                    # If we have more than one datapoint for this tracked object
                    pts = (np.array(self.db[i]).reshape(
                        (-1, 1, 2))).reshape(-1)
                    elements.append(TrackedPath(pts))

                    p1 = self.cameracountline[0]
                    q1 = self.cameracountline[1]
                    p2 = np.array(self.db[i][-1])
                    q2 = np.array(self.db[i][-2])
                    cp = np.cross(q1 - p1, q2 - p2)
                    if intersection(p1, q1, p2, q2):
                        self.intcount += 1
                        print(
                            "track_id={} just intersected camera countline; cross-prod={}; intcount={}"
                            .format(i, cp, self.intcount))
                        elements.append(TrackedPathIntersection(pts[-4:]))
                        if cp >= 0:
                            self.poscount += 1
                        else:
                            self.negcount += 1
                        # send_mqtt_msg(frameCapTime)

                elements.append(TrackedObject(bbox, str(track.track_id)))

            for det in detections:
                bbox = det.to_tlbr()
                elements.append(DetectedObject(bbox))

            elements.append(CountingStats(self.negcount, self.poscount))

            await q_out.put(elements)

    def graphical_output(self, render: RenderInfo, elements,
                         output_wh: (int, int)):
        (output_w, output_h) = output_wh

        # Clear screen
        self.draw.rectangle([0, 0, output_w, output_h], fill=0, outline=0)

        # Sort elements by display priority
        elements.sort(key=lambda e: e.priority)

        # Draw elements
        for e in elements:
            if hasattr(e, 'do_render'):
                e.do_render(render)

        # Copy backbuf to output
        backarray = np.array(self.backbuf)
        if self.color_mode is not None:
            outputrgba = cv2.cvtColor(backarray, self.color_mode)
        else:
            outputrgba = backarray
        outputrgb = cv2.cvtColor(outputrgba, cv2.COLOR_RGBA2RGB)
        self.output.write(outputrgb)
        #cv2.imshow('main', outputrgb)

    def text_output(self, handle, elements):
        # Sort elements by priority
        elements.sort(key=lambda e: e.priority)

        for e in elements:
            if hasattr(e, 'do_text'):
                e.do_text(handle, elements)

    async def render_output(self, q_in):
        (output_w, output_h) = (self.args.camera_width,
                                self.args.camera_height)
        ratio = 1  #fixme
        render = RenderInfo(ratio, FontLib(output_w), self.draw, self.backbuf)

        try:
            while self.running:
                elements = await q_in.get()

                self.graphical_output(render, elements, (output_w, output_h))

                for e in elements:
                    if isinstance(e, FrameInfo):
                        t_frame = e.t_frame
                        break
                elements.append(
                    TimingInfo('Overall latency', 'overall',
                               time.time() - t_frame))

                self.text_output(sys.stdout, elements)

                await asyncio.sleep(1.0 / 30.0)  # FIXME
        finally:
            self.output.release()

    def check_deleted_track(self, i):
        if i in self.db and len(self.db[i]) > 1:
            if any_intersection(self.cameracountline[0],
                                self.cameracountline[1], np.array(self.db[i])):
                self.delcount += 1
                print("delcount={}".format(self.delcount))
            self.db[i] = []

    async def start(self):
        self.running = True
        cameraQueue = FreshQueue()
        objectQueue = asyncio.Queue(maxsize=1)
        detectionQueue = asyncio.Queue(maxsize=1)
        resultQueue = asyncio.Queue(maxsize=1)
        drawQueue = asyncio.Queue(maxsize=1)

        asyncio.ensure_future(self.render_output(drawQueue))
        asyncio.ensure_future(self.process_results(resultQueue, drawQueue))
        asyncio.ensure_future(self.track_objects(detectionQueue, resultQueue))
        asyncio.ensure_future(self.encode_features(objectQueue,
                                                   detectionQueue))
        asyncio.ensure_future(self.detect_objects(cameraQueue, objectQueue))
        await self.capture(cameraQueue)
        self.running = False
コード例 #21
0
def main(argv):
    # print("location recieved in main as: ", e)
    ###################################
    global VIOLATION_PERCENTAGE, PROCESSING_STATUS, VIOLATION_FRAME
    violator_count_list = list()
    ###################################
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    yolo = YoloV3(classes=80)

    yolo.load_weights('./weights/yolov3.tf')
    logging.info('weights loaded')

    class_names = [c.strip() for c in open('./coco.names').readlines()]
    logging.info('classes loaded')
    video_path = 'test.mkv'

    try:
        vid = cv2.VideoCapture(int(FILE_URL))
    except:
        vid = cv2.VideoCapture(FILE_URL)
    time.sleep(1.0)

    out = None

    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print("height: ", height)
    print("width: ", width)
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('./result.avi', codec, fps, (width, height))
    frame_index = -1
    fps = 0.0
    count = 0
    PROCESSING_STATUS = True
    while True:
        _, img = vid.read()
        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break
        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, 416)
        temp_violators = set()
        temp_total_people = set()
        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            class_name1 = track.get_class()
            if class_name1 == "person":
                temp_total_people.add(track.track_id)
                bbox1 = track.to_tlbr()
                x1_c = int(bbox1[0] + (bbox1[2] - bbox1[0]) / 2)
                y1_c = int(bbox1[1] + (bbox1[3] - bbox1[1]) / 2)
                r1 = int(abs(bbox1[3] - bbox1[1]))
                color = (255, 0, 0)
                cv2.line(img, (x1_c, y1_c), (x1_c, y1_c + r1 // 2),
                         (0, 255, 0), 2)
                cv2.circle(img, (x1_c, y1_c), 5, (255, 20, 200), -1)
                scale = (r1) / 100
                transparentOverlay(img,
                                   dst_circle, (x1_c, y1_c - 5),
                                   alphaVal=110,
                                   color=(0, 200, 20),
                                   scale=scale)
                for other in tracker.tracks:
                    if not other.is_confirmed() or other.time_since_update > 1:
                        continue
                    if track.track_id == other.track_id:
                        continue

                    class_name2 = other.get_class()
                    if class_name2 == "person":
                        temp_total_people.add(other.track_id)
                        bbox2 = other.to_tlbr()
                        x2_c = int(bbox2[0] + (bbox2[2] - bbox2[0]) / 2)
                        y2_c = int(bbox2[1] + (bbox2[3] - bbox2[1]) / 2)
                        r2 = int(abs(bbox2[3] - bbox2[1]))
                        if int_circle(x1_c, y1_c, x2_c, y2_c, r1 // 2, r2 //
                                      2) >= 0 and abs(y1_c - y2_c) < r1 // 4:
                            temp_violators.add(track.track_id)
                            temp_violators.add(other.track_id)
                            cv2.line(img, (x1_c, y1_c), (x2_c, y2_c),
                                     (0, 0, 255), 2)
                            scale1 = (r1) / 100
                            transparentOverlay(img,
                                               dst_circle, (x1_c, y1_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale1)
                            scale2 = (r2) / 100
                            transparentOverlay(img,
                                               dst_circle, (x2_c, y2_c - 5),
                                               alphaVal=110,
                                               color=(0, 0, 255),
                                               scale=scale2)

        # print fps on screen
        ### Comment below 3 lines to not see live output screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)

        ### Violators calculation
        violators_for_frame = len(temp_violators)
        VIOLATION_PERCENTAGE = violators_for_frame
        print("Violation percentage: ", violators_for_frame)
        violator_count_list.append(int(violators_for_frame))
        ###
        ### Call to firebase upload function
        # if violators_for_frame > 20:
        #     social_dist_violation_frame_handler(img)
        #     cv2.imwrite("temp.png",img)
        #     firebase_upload("temp.png")
        #     os.remove("temp.png")

        frame_index = frame_index + 1

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if len(violator_count_list) == 0:
        mean_violation = 0
    else:
        mean_violation = sum(violator_count_list) / len(violator_count_list)
    PROCESSING_STATUS = False
    out.release()
    cv2.destroyAllWindows()
コード例 #22
0
    def run(self, catch):
        def intersect(A, B, C, D):
            return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(
                A, B, D)

        def ccw(A, B, C):
            return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] -
                                                                    A[0])

        def vector_angle(midpoint, previous_midpoint):
            x = midpoint[0] - previous_midpoint[0]
            y = midpoint[1] - previous_midpoint[1]
            return math.degrees(math.atan2(y, x))

        global truck
        global car
        titik1 = (100, 511)
        titik2 = (551, 511)
        # Definition of the parameters
        max_cosine_distance = 0.4
        nn_budget = None
        nms_max_overlap = 1.0

        # initialize deep sort
        model_filename = 'model_data/mars-small128.pb'
        encoder = gdet.create_box_encoder(model_filename, batch_size=1)
        # calculate cosine distance metric
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        # initialize tracker
        tracker = Tracker(metric)

        # initialize counting variables
        count_dict = {}  # initiate dict for storing counts
        total_counter = 0
        up_count = 0
        down_count = 0
        from collections import Counter
        class_counter = Counter()  # store counts of each detected class
        from collections import deque
        already_counted = deque(
            maxlen=50)  # temporary memory for storing counted IDs
        intersect_info = []  # initialise intersection list
        memory = {}

        # load configuration for object detector
        config = ConfigProto()
        config.gpu_options.allow_growth = True
        session = InteractiveSession(config=config)
        input_size = 416
        video_path = 'C:/Users/MSI Laptop/Pictures/overpass.mp4'  #ini dia

        saved_model_loaded = tf.saved_model.load('./checkpoints/yolov4-416',
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

        # begin video capture
        try:
            vid = cv2.VideoCapture(int(catch))
        except:
            vid = cv2.VideoCapture(catch)

        frame_num = 0
        # while video is running
        while True:
            return_value, frame = vid.read()
            if return_value:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame)
            else:
                print(
                    'Video Telah Selesai atau Gagal Memuat, coba dengan Video lainnya!'
                )
                break
            frame_num += 1
            # print('Frame #: ', frame_num)
            frame_size = frame.shape[:2]
            image_data = cv2.resize(frame, (input_size, input_size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=0.45,
                score_threshold=0.50)

            # convert data to numpy arrays and slice out unused elements
            num_objects = valid_detections.numpy()[0]
            bboxes = boxes.numpy()[0]
            bboxes = bboxes[0:int(num_objects)]
            scores = scores.numpy()[0]
            scores = scores[0:int(num_objects)]
            classes = classes.numpy()[0]
            classes = classes[0:int(num_objects)]

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(bboxes, original_h, original_w)

            # store all predictions in one parameter for simplicity when calling functions
            pred_bbox = [bboxes, scores, classes, num_objects]

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to customize tracker for only people)
            # allowed_classes = ['person']

            # loop through objects and use class index to get class name, allow only classes in allowed_classes list
            names = []
            deleted_indx = []
            for i in range(num_objects):
                class_indx = int(classes[i])
                class_name = class_names[class_indx]
                if class_name not in allowed_classes:
                    deleted_indx.append(i)
                else:
                    names.append(class_name)
            names = np.array(names)
            count = len(names)

            bboxes = np.delete(bboxes, deleted_indx, axis=0)
            scores = np.delete(scores, deleted_indx, axis=0)

            # encode yolo detections and feed to tracker
            features = encoder(frame, bboxes)
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    bboxes, scores, names, features)
            ]

            #initialize color map
            cmap = plt.get_cmap('tab20b')
            colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

            # run non-maxima supression
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(
                boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)
            #buat garis biru
            cv2.line(frame, titik1, titik2, (0, 255, 255), 2)

            # update tracks
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr(
                )  # Get current position in bounding box format `(min x, miny, max x,max y)
                #track_cls = track.cls  # most common detection class for track
                class_name = track.get_class()

                #Object counting
                midpoint = track.tlbr_midpoint(
                    bbox)  # Finds midpoint of a box in tlbr format.
                origin_midpoint = (midpoint[0], frame.shape[0] - midpoint[1]
                                   )  # get midpoint respective to botton-left

                if track.track_id not in memory:
                    memory[track.track_id] = deque(maxlen=2)

                memory[track.track_id].append(midpoint)
                previous_midpoint = memory[track.track_id][0]

                origin_previous_midpoint = (previous_midpoint[0],
                                            frame.shape[0] -
                                            previous_midpoint[1])

                if intersect(midpoint, previous_midpoint, titik1,
                             titik2) and track.track_id not in already_counted:
                    class_counter[class_name] += 1
                    total_counter += 1
                    cv2.line(frame, titik1, titik2, (255, 0, 0),
                             2)  #garis merah
                    already_counted.append(
                        track.track_id)  # Set already counted for ID to true.
                    angle = vector_angle(origin_midpoint,
                                         origin_previous_midpoint)

                    if angle > 0:
                        up_count += 1
                    if angle < 0:
                        down_count += 1
                # draw bbox on screen
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    frame, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(frame, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)

            if len(memory) > 50:
                del memory[list(memory)[0]]

            fps = 1.0 / (time.time() - start_time)
            # Draw total count.
            text = ("FPS: %.2f" % fps)
            frame = ps.putBText(frame,
                                text,
                                text_offset_x=int(frame.shape[1] - 185),
                                text_offset_y=int(0.05 * frame.shape[0]),
                                vspace=10,
                                hspace=10,
                                font_scale=1.0,
                                background_RGB=(228, 20, 222),
                                text_RGB=(255, 255, 255))
            text = "Total: {}".format(str(total_counter))
            frame = ps.putBText(frame,
                                text,
                                text_offset_x=int(10),
                                text_offset_y=int(0.05 * frame.shape[0]),
                                vspace=10,
                                hspace=10,
                                font_scale=1.0,
                                background_RGB=(10, 20, 222),
                                text_RGB=(255, 255, 255))

            # display counts for each class as they appear
            y = 0.12 * frame.shape[0]
            for cls in class_counter:
                class_count = class_counter[cls]
                text = str(cls) + " " + str(class_count)
                if str(cls) == 'car':
                    car = str(class_count)
                elif str(cls) == 'truck':
                    truck = str(class_count)
                frame = ps.putBText(frame,
                                    text,
                                    text_offset_x=int(10),
                                    text_offset_y=int(y),
                                    vspace=5,
                                    hspace=10,
                                    font_scale=1.0,
                                    background_RGB=(20, 210, 4),
                                    text_RGB=(255, 255, 255))
                y += 0.05 * frame.shape[0]
            # self.ui.label_2.setText(text)
            # calculate frames per second of running detections
            # fps = 1.0 / (time.time() - start_time)
            # print("FPS: %.2f" % fps)
            result = np.asarray(frame)
            result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            # cv2.imshow("Output Video", result)
            # self.ui.label.setPixmap(QPixmap.fromImage(result))
            self.display_frame(result)
            if cv2.waitKey(1) & 0xFF == ord('q'): break
        cv2.destroyAllWindows()
コード例 #23
0
ファイル: demo.py プロジェクト: lzq603/PedestriansDetection
def detect(yolo, videoChoice, site, ip):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture(videoPath)
    starttime = time.time()

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output%d.avi' % (site), fourcc, 30, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)

        print("box_num", len(boxs))

        # 调用http协议,传输数据
        import requests
        # 这里需要加上异常处理
        try:
            url = ip + '/peoplecount/insert?num=%d&site=%d' % (len(boxs), site)
            print(url)
            req = requests.get(url)
            print(req.text)
        except requests.exceptions.RequestException as e:
            print('error')

        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #24
0
ファイル: object_reg4.py プロジェクト: Kanatip-P/ds-reg
def main(_argv):
    avg = []
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    #regression model load
    weight_path = './2_input_model_2-3.5%/'
    loaded_model = tf.keras.models.load_model(weight_path)

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]
        #print("pred_bbox: ",pred_bbox[0])
        #print("scores: ",pred_bbox[1])
        #print("classes :",pred_bbox[2])
        #print("num :",pred_bbox[3])
        #print("width :",width)
        #print("height :",height)
        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        #print("boxs ",boxs)
        #print("scores ",scores)
        #print("classes ",classes)
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        #print("indices ",indices)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        cv2.putText(frame, "using regress", (5, 35),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)
        #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2)
        cv2.putText(frame, "frame# {}".format(frame_num), (750, 35),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if 'entrance' not in classes:
                if len(classes) > 1:
                    if (contains_duplicates(classes) == False):
                        color = (50, 89, 170)
                        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
                        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        x1, y1, x2, y2 = convert2(
                            width, height, int(boxs[0][0]), int(boxs[0][1]),
                            int(boxs[0][0] + boxs[0][2]),
                            int(boxs[0][1] +
                                boxs[0][3]))  #xywh to xmin ymin xmax ymax
                        x3, y3, x4, y4 = convert2(
                            width, height,
                            int(bboxes[1][0]), int(bboxes[1][1]),
                            int(bboxes[1][0] + bboxes[1][2]),
                            int(bboxes[1][1] +
                                bboxes[1][3]))  #xywh to xmin ymin xmax ymax
                        reg_input = np.array([[
                            class_index(classes[0]), x1, y1, x2, y2,
                            class_index(classes[1]), x3, y3, x4, y4
                        ]])
                        predictions = loaded_model.predict(reg_input)
                        a1_pred = predictions[0]
                        b1_pred = predictions[1]
                        c1_pred = predictions[2]
                        d1_pred = predictions[3]
                        xmin, xmax, ymin, ymax = convert(
                            width, height, a1_pred, b1_pred, c1_pred, d1_pred)
                        start_point = (xmin, ymin)
                        end_point = (xmax, ymax)
                        rect1 = xmax - xmin
                        rect2 = ymax - ymin
                        check_rect = rect2 / rect1
                        print("check_rect:{}".format(check_rect))
                        if check_rect > 1:
                            blk = np.zeros(frame.shape, np.uint8)
                            cv2.rectangle(blk, start_point, end_point, color,
                                          cv2.FILLED)
                            frame = cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
                            print(
                                "predict_BBox Coords (xmin, ymin, xmax, ymax): {}"
                                .format((xmin, ymin, xmax, ymax)))

            ########
            #      select one entrace
            ########
            #if classes.count('entrance')>1:
            #    entrance_num=[]
            #    iou_list=[]
            #    iou_check=[]
            #    for i in range(len(classes)):
            #        if classes[i]=='entrance'
            #        entrance_num.append(i)
            #        if len(classes)>1:
            #            if(contains_duplicates(classes)==False):
            #                color = (50, 89, 170)
            #                width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
            #                height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
            #                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
            #                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])
            #                predictions = loaded_model.predict(reg_input)
            #                a1_pred = predictions[0]
            #                b1_pred = predictions[1]
            #                c1_pred = predictions[2]
            #                d1_pred = predictions[3]
            #                xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
            #                ###IOU###
            #                GT_bbox_area = (xmax -  xmin + 1) * (  ymax -ymin + 1)
            #                ###########
            #                ##check entrace##
            #                Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1)
            #                x_top_left =np.max([x_topleft_gt, x_topleft_p])
            #                y_top_left = np.max([y_topleft_gt, y_topleft_p])
            #                x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p])
            #                y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p])
            #
            #                intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left  + 1)
            #
            #                union_area = (GT_bbox_area + Pred_bbox_area - intersection_area)
            #
            #                iou_check.append(intersection_area/union_area)
            #
            #        for j in len(iou_check):
            #           if entrance_num[j]<iou_check.max:
            #               track.delete
            #

            # draw bbox on screen

            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            if (class_name == 'entrance'):
                print("RED Tracker ID: {}, Class: {}".format(
                    str(track.track_id), class_name))
                blk = np.zeros(frame.shape, np.uint8)
                cv2.rectangle(blk, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0),
                              cv2.FILLED)
                frame = cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        avg.append(fps)
        print("avg fps {}".format(statistics.mean(avg)))
        cv2.putText(frame, "FPS: %.2f" % fps, (50, 500),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #25
0
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3  # 最大余弦距离
    nn_budget = None  # ??
    nms_max_overlap = 1.0  # 非极大值抑制??

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    #  model_filename = 'model_data/darknet_yolov3_model.pb'
    encoder = gdet.create_box_encoder(model_filename,
                                      batch_size=1)  # encoder编码器

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)  # 余弦度量
    tracker = Tracker(metric)  # 追踪函数

    writeVideo_flag = True

    #video_capture = cv2.VideoCapture(0)  # 获取摄像头数据
    video_capture = cv2.VideoCapture('test5.mp4')  # 获取视频数据

    if writeVideo_flag:
        # Define the codec and create VideoWriter object  定义编码器,并创建 videowriter 对象
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        print("video的w:", w, "。video的h:", h)
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')  # 使用MJPG将视频提取成图片

        out = cv2.VideoWriter('out.avi', fourcc, 15, (w, h))
        out_m = cv2.VideoWriter('out_m.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        print("当前是第", int(video_capture.get(1)), "帧")

        ret, frame = video_capture.read()  # frame 是3维矩阵
        # print("frame是:", frame)
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb  bgr图像转rgb图像

        # 视频帧里读取出来的图像一帧(image)是PIL.Image.Image属性
        img = np.asarray(image)  # 对原来image的Image属性,转换为numpy.ndarray属性

        boxs = yolo.detect_image(image)  # 转换后的图像,用yolo去检测
        # print("boxs的type", type(boxs))  boxs的type <class 'list'>
        # print("boxs=", boxs, "\n\n") # 和下面的boxes一样
        features = encoder(frame, boxs)  # 编码器提取features

        # score to 1.0 here.  这里得分是1分
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # deep_sort在内存中的位置
        # print("detections", detections)  #  deep_sort.detection.Detection object at 0x000002A8FBC6D7B8

        # Run non-maxima suppression.  运行非极大值抑制
        # boxes是左上角的(x,y)坐标
        boxes = np.array([d.tlwh for d in detections
                          ])  # tlwh是检测框的(x,y)和宽和高 ,是ndarray数组

        # print("boxes", boxes)
        # print("boxes的shape", boxes.shape, "\nshape元组有几个数:", len(boxes.shape))  # boxes 的形状
        # print("boxes的shape", boxes.shape, "\n")  # boxes 的形状
        a = boxes.shape[0]
        if len(boxes.shape) == 1:
            b = 0
        else:
            b = boxes.shape[1]
        # print("(a,b)=", a, b)  # (a,b)即为boxes的shape

        i = 1
        if b != 0:
            # print("into !=0")
            while i <= a:
                box_temp = boxes[i - 1, :]  # 获取到每一个检测目标的左上角坐标
                # print("boxes的第", i, "行:", box_temp)
                x_center, y_center = travel(box_temp)  # 调用travel函数,得到检测框中心点位置
                anchor_width = box_temp[2]  # anchor_box的宽
                anchor_height = box_temp[3]  # anchor_box的高
                #  print("得到的x_center=", x_center, "y_center", y_center)
                i += 1
            print("one frame detection end\n")

        scores = np.array([d.confidence for d in detections])  # 检测的置信度
        # 目录indices(类型type是indices),将检测目标通过nms(最大值抑制),减小重叠造成的影响,再存入indices
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices
                      ]  # detection的类型type是indices的list,就是把indices复制过来
        # print("detection的内容:", detections)  # detection的内容是检测目标在内存中的位置

        # 先看看track更新之前id是什么
        for track in tracker.tracks:
            a = track.track_id
            print("当前的track id是", a)
            name = track.age
            print("当前的track name是", name)
            cov = track.covariance
            print("当前的track covariance是", cov)
            fea = track.features
            print("当前的track features是", fea)
            hit = track.hits
            print("当前的track hits是", hit)
            mea = track.mean
            print("当前的track mean是", mea)
            sta = track.state
            print("当前的track sta是", sta)
            tsu = track.time_since_update
            print("当前的track id是", tsu)

        # Call the tracker 调用追踪器
        tracker.predict()
        tracker.update(detections)

        follow_id = []
        follow = []

        # 画阈值线
        cv2.rectangle(img, (0, int(h / 2)), (int(w), int(h / 2)),
                      (255, 0, 255), 1)
        cv2.rectangle(img, (int(w / 2), 0), (int(w / 2), int(h)),
                      (255, 0, 255), 1)
        # 废弃id数
        cv2.putText(img, "waste id  is" + str(count), (660, 100), 0, 1,
                    (125, 255, 125), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            # print("tlbr之前bbox", bbox)
            bbox = track.to_tlbr()
            print("bbox是:", bbox)

            print("在添加之前,follow_id当前是:", follow_id)
            print("在添加之前,follow当前是:", follow)
            follow_id.append(track.track_id)
            follow.append((bbox[0], bbox[1], bbox[2], bbox[3]))
            # 添加之后follow_id和follow数组的输出,在compare函数里
            follow, follow_id = compare(follow, follow_id, bbox, track, w,
                                        h)  # 进入此函数比较位置信息
            length2 = len(follow_id)  # 计算follow_id数组长度
            print("length2", length2)
            print("follow", follow)
            print("follow_id", follow_id)
            print("follow_id[length2-1]", follow_id[length2 - 1])
            print("follow[follow_id.index(follow_id[length2 - 1])][0]",
                  follow[follow_id.index(follow_id[length2 - 1])][0])
            print("type follow[follow_id.index(follow_id[length2 - 1])][0]",
                  type(follow[follow_id.index(follow_id[length2 - 1])][0]))
            # 白色框  (追踪框)BGR
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            # cv2.rectangle(img, (
            #     int(follow[follow_id.index(follow_id[length2 - 1])][0]),
            #     int(follow[follow_id.index(follow_id[length2 - 1])][1])),
            #               (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)

            # 绿色文本 (追踪ID)putText(画面,id,位置坐标,字体,字体大小,颜色,字体厚度,线型
            #  cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (255, 255, 0), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]) + 8, int(bbox[1]) + 8), 0, 1,
                        (0, 255, 0), 2)
            # print("int(follow_id[length2 - 1])", (int(follow_id[length2 - 1]) - int(count)))
            # 把folwaste_id减为实际追踪到
            a1 = int(follow_id[length2 - 1]) - 1
            # a1 = int(follow_id[length2 - 1])
            # cv2.putText(img, str(a1), (int(bbox[0]) + 8, int(bbox[1]) + 8), 0, 1, (0, 255, 0), 2)
            cv2.putText(img, str(a1), (int(bbox[0]) + 8, int(bbox[1]) + 8), 0,
                        1, (0, 255, 0), 2)
            print("追踪ID是:", track.track_id)

            #  视频检测框上方输出当前ID
            cv2.putText(frame, "current id is " + str(track.track_id),
                        (int(bbox[0]), 40), 0, 1, (125, 155, 125), 2)

            cv2.putText(img, "current id is" + str(a1), (int(bbox[0]), 80), 0,
                        1, (125, 255, 0), 2)

            print("one frame track end")

        for fol in follow:
            print("fol是", fol)
            cv2.rectangle(img, (int(fol[0]), int(fol[1])),
                          (int(fol[2]), int(fol[3])), (255, 255, 255), 2)

        for det in detections:
            bbox = det.to_tlbr()
            # 蓝色框(检测框)BGR,而不是RGB
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        # h, w, l = np.shape(frame)
        # print("hwl:", h, w, l)

        # 每一帧视频的显示
        cv2.imshow('origin Frame', frame)
        # cv2.rectangle(img, (400, 200), (500, 600), (255, 255, 0), 2)  # 图像中指定位置画框 记得删除
        cv2.imshow('modify frame', img)  # 显示当前帧,会随视频走动

        if writeVideo_flag:
            # save a frame  保存每一帧
            out.write(frame)  # 存入out.avi里
            out_m.write(img)  # 第二个视频窗口的保存
            frame_index = frame_index + 1
            list_file.write(str(frame_index) +
                            ' ')  # 存入detection.txt(list_file)
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')

            list_file.write('\n')

        # 控制台输出fps
        fps = (fps + (1. / (time.time() - t1))) / 2

        print("fps= %f" % (fps))
        print("一帧结束\n\n")

        # Press Q to stop!  关闭窗口
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # 清空视频流
    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #26
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    ppe_input_size = FLAGS.ppe_size
    helmet_input_size = FLAGS.helmet_size
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    times = []
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    ppe_detector = create_ppe_detector(ppe_input_size)
    helmet_detector = create_helmet_detector(helmet_input_size)

    nacho_image1 = face_recognition.load_image_file("./data/faces/nacho1.jpg")
    nacho_image2 = face_recognition.load_image_file("./data/faces/nacho2.jpg")
    nacho_image3 = face_recognition.load_image_file("./data/faces/nacho3.jpg")

    nacho_face_encoding1 = face_recognition.face_encodings(nacho_image1)[0]
    nacho_face_encoding2 = face_recognition.face_encodings(nacho_image2)[0]
    nacho_face_encoding3 = face_recognition.face_encodings(nacho_image3)[0]

    known_face_encodings = [
        nacho_face_encoding1, nacho_face_encoding2, nacho_face_encoding3
    ]
    known_face_names = ["Nacho", "Nacho", "Nacho"]
    face_locations = []
    face_encodings = []
    face_names = []

    max_cosine_distance = 0.7  # 0.5 / 0.7
    nn_budget = None
    model_filename = './weights/tracker/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    Track_only = []

    logging.info("Models loaded!")
    while True:
        return_value, frame = vid.read()
        if not return_value:
            logging.warning("Empty Frame")
            break

        frame_size = frame.shape[:2]
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)

        img_in = tf.expand_dims(frame, 0)
        img_in = transform_images(img_in, helmet_input_size)

        image_data = utils.image_preprocess(np.copy(frame),
                                            [ppe_input_size, ppe_input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if FLAGS.framework == 'tf':
            ppe_pred_bbox = ppe_detector.predict(image_data)
        elif FLAGS.framework == 'trt':
            batched_input = tf.constant(image_data)
            ppe_pred_bbox = []
            result = ppe_detector(batched_input)
            for _, value in result.items():
                value = value.numpy()
                ppe_pred_bbox.append(value)

        helmet_pred_bbox = helmet_detector.predict(img_in)

        # face_locations = face_recognition.face_locations(small_frame)
        face_locations = face_recognition.face_locations(frame)
        face_encodings = face_recognition.face_encodings(frame, face_locations)
        face_names = []
        for face_encoding in face_encodings:
            matches = face_recognition.compare_faces(known_face_encodings,
                                                     face_encoding)
            name = "Unknown"

            # if True in matches:
            #     first_match_index = matches.index(True)
            #     name = known_face_names[first_match_index]
            face_distances = face_recognition.face_distance(
                known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
            face_names.append(name)

        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        ppe_bboxes = post_process_boxes(ppe_pred_bbox, 'yolov4', frame_size,
                                        ppe_input_size)
        helmet_bboxes = post_process_boxes(helmet_pred_bbox, 'yolov3',
                                           frame_size, helmet_input_size)

        face_bboxes = []
        for (top, right, bottom, left), name in zip(face_locations,
                                                    face_names):
            # top *= 4
            # left *= 4
            # right *= 4
            # bottom *= 4
            face_bboxes.append([left, top, right, bottom, name])
        bboxes = utils.calculate_status(ppe_bboxes, helmet_bboxes, [])

        boxes, safety_scores, site_roles, face_names = [], [], [], []
        for bbox in bboxes:
            boxes.append([
                bbox[0].astype(int), bbox[1].astype(int),
                bbox[2].astype(int) - bbox[0].astype(int),
                bbox[3].astype(int) - bbox[1].astype(int)
            ])
            safety_scores.append(bbox[4])
            site_roles.append(bbox[5])
            face_names.append("None")
        for bbox in face_bboxes:
            boxes.append(
                [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]])
            safety_scores.append(0)
            site_roles.append(-1)
            face_names.append(bbox[4])

        boxes = np.array(boxes)
        safety_scores = np.array(safety_scores)
        site_roles = np.array(site_roles)
        face_names = np.array(face_names)
        features = np.array(encoder(frame, boxes))
        detections = [
            Detection(bbox, 0.9, 0, feature, safety_score, site_role,
                      face_name)
            for bbox, feature, safety_score, site_role, face_name in zip(
                boxes, features, safety_scores, site_roles, face_names)
        ]

        tracker.predict()
        tracker.update(detections)

        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  # 1 / 5
                continue
            bbox = track.to_tlbr()

            tracking_id = track.track_id
            safety_score = track.get_safety_score()
            site_role = track.get_site_role()
            face_name = track.get_face_name()
            if site_role == -1:
                to_add = [face_name, site_role, tracking_id]
            else:
                to_add = [safety_score, site_role, tracking_id]

            tracked_bboxes.append(bbox.tolist() + to_add)
        image = utils.draw_demo(frame, tracked_bboxes)
        image = cv2.putText(image, "Time: {:.2f} FPS".format(fps), (0, 24),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("Detections", image)
        if FLAGS.output:
            out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
コード例 #27
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0

    # total persons tracked
    trackedPersons = {}

    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        if FLAGS.person_only:
            allowed_classes = ['person']

        if FLAGS.person_bags:
            allowed_classes = ['person', 'backpack', 'handbag', 'suitcase']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)

        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # show background if show waiting time
        if FLAGS.waiting_time and len(tracker.tracks) > 0:
            cv2.rectangle(frame, (0, 0), (200, 200), (255, 255, 255), -1)
            cv2.putText(frame, "time lapsed", (5, 35),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.3, (0, 0, 0), 2)

        # update tracks
        for (t, track) in enumerate(tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

            # show waiting time of each person
            if FLAGS.waiting_time and len(tracker.tracks) > 0:
                if track.track_id not in trackedPersons:
                    trackedPersons[track.track_id] = 1
                trackedPersons[track.track_id] = (
                    trackedPersons[track.track_id] + 1)
                waitedTime = round(trackedPersons[track.track_id] / 30, 1)
                cv2.putText(
                    frame,
                    "pers-" + str(track.track_id) + ": " + str(waitedTime),
                    (5, (t * 20) + 70), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                    color, 2)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #28
0
ファイル: tracker.py プロジェクト: obodroid/darknet
class DeepSort(Process):
    def __init__(self, video_serial, isStop, gpuIndex, trackingQueue,
                 resultQueue):
        Process.__init__(self)
        self.daemon = True
        self.encoder = None
        self.tracker = None
        self.isStop = isStop
        self.isDisplay = False
        self.gpuIndex = gpuIndex
        self.video_serial = video_serial
        self.trackingQueue = trackingQueue
        self.resultQueue = resultQueue

    def run(self):
        setproctitle.setproctitle("Tracker {}".format(self.video_serial))
        print('Tracker {}'.format(self.video_serial))

        max_cosine_distance = 0.45
        nn_budget = 100

        self.encoder = gdet.create_box_encoder(imgEncPath,
                                               batch_size=1,
                                               gpu_index=self.gpuIndex)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric,
                               max_iou_distance=0.7,
                               max_age=50,
                               n_init=5)

        while self.isStop.value is False:
            while not self.trackingQueue.empty():
                robotId, videoId, msg, frame, bboxes, confidences, objectTypes, targetObjects = self.trackingQueue.get(
                )
                video_serial = robotId + "-" + videoId
                print('Tracker {} at keyframe {}'.format(
                    video_serial, msg['keyframe']))

                features = self.encoder(frame, bboxes)
                detections = [
                    Detection(bbox, confidence, feature, objectType)
                    for bbox, confidence, feature, objectType in zip(
                        bboxes, confidences, features, objectTypes)
                ]

                indices = [i for i in np.arange(len(detections)) \
                    if detections[i].confidence > 0.8 and detections[i].objectType in targetObjects]

                detections = [detections[i] for i in indices]
                msg['detectedObjects'] = [
                    msg['detectedObjects'][i] for i in indices
                ]
                print("detection indices: {}".format(indices))

                # Call the tracker
                self.tracker.predict()
                self.tracker.update(detections)

                if self.isDisplay:
                    displayFrame = frame.copy()

                for detection_id, detectedObject in zip(
                        np.arange(len(msg['detectedObjects'])),
                        msg['detectedObjects']):
                    for track in self.tracker.tracks:
                        if not track.is_confirmed(
                        ) or track.time_since_update > 0:
                            print("Tracker {} at keyframe {} track {} missed x {} y {}".format( \
                                self.video_serial, msg['keyframe'], str(track.track_id), int(track.to_tlwh()[0]), int(track.to_tlwh()[1])))
                            bbox = track.to_tlbr()
                            if self.isDisplay:
                                cv2.rectangle(displayFrame,
                                              (int(bbox[0]), int(bbox[1])),
                                              (int(bbox[2]), int(bbox[3])),
                                              (127, 127, 127), 2)
                                cv2.putText(displayFrame,
                                            "{}".format(str(track.track_id)),
                                            (int(bbox[0]), int(bbox[1]) - 20),
                                            0, 5e-3 * 100, (0, 127, 0), 2)
                            continue

                        if track.detection_id == detection_id:
                            print("Tracker {} at keyframe {} track {} {} x {} y {}".format( \
                                self.video_serial, msg['keyframe'], detectedObject["objectType"] \
                                , str(track.track_id), int(track.to_tlwh()[0]), int(track.to_tlwh()[1])))

                            detectedObject["track_id"] = str(track.track_id)
                            tracking_bbox = track.to_tlwh()
                            detectedObject["tracking_bbox"] = {
                                "x": tracking_bbox[0],
                                "y": tracking_bbox[1],
                                "w": tracking_bbox[2],
                                "h": tracking_bbox[3],
                            }

                            if self.isDisplay:
                                bbox = track.to_tlbr()
                                cv2.rectangle(displayFrame,
                                              (int(bbox[0]), int(bbox[1])),
                                              (int(bbox[2]), int(bbox[3])),
                                              (255, 255, 255), 2)
                                cv2.putText(displayFrame, "{} {}".format(detectedObject["objectType"], \
                                    str(track.track_id)),(int(bbox[0]), int(bbox[1]) - 20), 0, 5e-3 * 100, (0,255,0), 2)
                            break

                self.resultQueue.put([robotId, videoId, msg])

                if self.isDisplay:
                    print("Tracker {} show frame".format(self.video_serial))
                    title = "track : {}".format(self.video_serial)
                    cv2.putText(displayFrame,
                                "keyframe {}".format(msg['keyframe']),
                                (30, 100), 0, 5e-3 * 100, (0, 0, 255), 2)
                    cv2.imshow(title, displayFrame)
                    cv2.waitKey(1)

                cv2.waitKey(1)
                sys.stdout.flush()

        cv2.waitKey(1)
        sys.stdout.flush()

        print("Tracker {} Stopped".format(self.video_serial))
コード例 #29
0
def main(yolo):
    os.chdir('..')
    send_to_GUI = 0
    video_record = 1
    source = 'RPi'  # 0 for webcam or RPi or filename
    FLAGScsv = 0
    dict_prof = {}

    if FLAGScsv:
        csv_obj = save_csv()
    id_stay_old = [[], []]
    colors = {
        "male": (0, 0, 255),
        "female": (255, 0, 0),
        "None": (255, 255, 255)
    }

    device_obj = device_register()

    if send_to_GUI:
        # send video to note's GUI
        gst_out = cv2.VideoWriter(
            'appsrc ! videoconvert ! jpegenc quality=12 ! tcpserversink host=0.0.0.0 port=6007 sync=false',
            0, 15, (416, 416))

# Definition of the parameters
    max_cosine_distance = 1.5
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'deep_sort/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=8)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric,
                      max_iou_distance=0.7,
                      max_age=50,
                      n_init=3,
                      _next_id=1)

    if source == 'RPi':
        video_capture = connect_RPi()
    else:
        video_capture = cv2.VideoCapture(source)
        video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)

    print('video source : ', source)
    if video_record:
        out = cv2.VideoWriter()
        out.open('output.mp4', cv2.VideoWriter_fourcc(*'H264'), 25,
                 (1920, 1080), True)


#  ___________________________________________________________________________________________________________________________________________MAIN LOOP
    t_fps = [time.time()]
    while True:
        for i in range(round(20 / 8)):
            video_capture.grab()

        ret, frame = video_capture.read()
        if not ret:
            if source == 'RPi':
                print('[ INFO ] No frame received from RPi: wait for 5 sec')
                time.sleep(5)
                video_capture = connect_RPi()
                continue
            else:
                video_capture = cv2.VideoCapture(source)
                video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
                continue

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # ______________________________________________________________________________________________________________________________DETECT WITH YOLO

        [gen_things, dev_things
         ] = yolo.detect_image(frame,
                               boxes_only=True)  # main detect function HERE
        features_gen = encoder(frame, gen_things[0])
        detections_gen = [
            Detection(bbox, 1.0, feature_gen)
            for bbox, feature_gen in zip(gen_things[0], features_gen)
        ]

        features_dev = encoder(frame, dev_things[0])
        detections_dev = [
            Detection(bbox, 1.0, feature_dev)
            for bbox, feature_dev in zip(dev_things[0], features_dev)
        ]

        device_obj.startframe(detections_dev)

        # ______________________________________________________________________________________________________________________________DRAW DEVICE

        for i in range(0, len(detections_dev)):
            bbox = detections_dev[i].to_tlbr()
            label = dev_things[1][i]

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, label, (int(bbox[0]), int(bbox[1]) + 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (255, 0, 0), 2)

        # ______________________________________________________________________________________________________________________________Call the tracker

        tracker.predict()
        tracker.update(detections_gen, gen_things[1])  # feed detections

        # __________________________________________________________________________________________________________________________DRAW TRACK RECTANGLE

        id_stay = [[], []]

        for track in tracker.tracks:
            #dev_1p = {track.track_id:None}

            if track.is_confirmed() and track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()  #(min x, miny, max x, max y)
            bcenter = track.to_xyah(
            )  #(center x, center y, aspect ratio,height)
            dict_prof[track.track_id] = [[str(track.gender)], []]
            # check device
            if (len(detections_dev) != 0) and (len(detections_gen) !=
                                               0):  # detected some thing
                euc_1p = device_obj.update_person(bcenter, track.track_id)
                for connect in euc_1p:  #each person
                    if connect is not None:
                        cv2.line(frame, (int(bcenter[0]), int(bcenter[1])),
                                 (int(connect[1]), int(connect[2])),
                                 (0, 255, 0), 3)
                        device_label = dev_things[1][int(connect[0])]
                        if device_label not in dict_prof[
                                track.track_id]:  # not write the same device
                            dict_prof[track.track_id][1].append(device_label)

            if track.gender == 'male':  # Avoid None
                id_stay[0].append(track.track_id)
                dict_prof[track.track_id][0] = ['male']
            if track.gender == 'female':
                id_stay[1].append(track.track_id)
                dict_prof[track.track_id][0] = ['female']

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])),
                          colors[str(track.gender)], 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1]) + 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (0, 255, 0), 3)
            cv2.putText(frame, str(track.gender),
                        (int(bbox[0]), int(bbox[1]) + 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 200, (0, 255, 0), 3)

        frame = cv2.cvtColor(
            frame, cv2.COLOR_RGB2BGR)  #change to BGR for showing with OpenCV
        # __________________________________________________________________________________________________________________________ FRAME RATE things
        t_fps.append(time.time())
        fps = 1 / (t_fps[1] - t_fps[0])
        t_fps.pop(0)
        cv2.putText(frame, 'FPS : {:.2f}'.format(fps), (5, 20),
                    cv2.FONT_HERSHEY_SIMPLEX, 5e-3 * 100, (0, 0, 255), 2)
        out.write(
            frame) if video_record else None  # write frame if record to file
        if send_to_GUI:
            frame = cv2.resize(frame, (416, 416))
            gst_out.write(frame)
            print('FPS : {:.2f}'.format(fps))
        else:
            cv2.imshow('', frame)

        if (id_stay != id_stay_old
            ) and FLAGScsv:  # save csv if people in frame have changed
            csv_obj.save_event(id_stay)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        id_stay_old = id_stay

    out.release() if video_record else None
    gst_out.release() if send_to_GUI else None
    video_capture.release()
    cv2.destroyAllWindows()
    if FLAGScsv:
        csv_obj.save_profile(dict_prof)
コード例 #30
0
ファイル: demo.py プロジェクト: shmilymm/deep_sort_yolov3
def main(yolo):

   # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    
   # deep_sort 
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True 
    
    video_capture = cv2.VideoCapture(0)

    if writeVideo_flag:
    # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1 
        
    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break;
        t1 = time.time()

        image = Image.fromarray(frame)
        boxs = yolo.detect_image(image)
       # print("box_num",len(boxs))
        features = encoder(frame,boxs)
        
        # score to 1.0 here).
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update >1 :
                continue 
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
            cv2.putText(frame, str(track.track_id),(int(bbox[0]), int(bbox[1])),0, 5e-3 * 200, (0,255,0),2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)
            
        cv2.imshow('', frame)
        
        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index)+' ')
            if len(boxs) != 0:
                for i in range(0,len(boxs)):
                    list_file.write(str(boxs[i][0]) + ' '+str(boxs[i][1]) + ' '+str(boxs[i][2]) + ' '+str(boxs[i][3]) + ' ')
            list_file.write('\n')
            
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        print("fps= %f"%(fps))
        
        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #31
0
def main():
    # Definition of the parameters
    max_cosine_distance = 2.0
    nn_budget = None
    nms_max_overlap = 3.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True  # show object box blue when detect
    writeVideo_flag = True  # record video ouput

    defaultSkipFrames = 5  # skipped frames between detections
    defaultConfidences = 0.5

    # set up collection of door
    H1 = 245
    W1 = 370
    H2 = 278
    W2 = 480
    H = None
    W = None

    R = 80  # min R is 56

    door_dict = setup_door(H1, W1, H2, W2, R)

    totalFrames = 0
    totalIn = 0

    # create a empty list of centroid to count traffic
    pts = [deque(maxlen=30) for _ in range(9999)]

    file_path = 'D:\\video/[Sala Outside][2020-05-28T16-01-39][2020-05-28T18-02-09].mp4'
    video_capture = cv2.VideoCapture(file_path)

    fps_imutils = imutils.video.FPS().start()

    if writeVideo_flag:
        fourcc = cv2.VideoWriter_fourcc(*'MP4V')
        out = cv2.VideoWriter('output_yolov4.mp4', fourcc, 3, (736, 480))

    while True:
        oke, frame = video_capture.read()  # frame shape 640*480*3
        if not oke:
            break

        frame = cv2.resize(frame, (736, 480))

        # if the frame dimensions are empty, set them
        if W is None or H is None:
            (H, W) = frame.shape[:2]

        # calculate video time
        videotime = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000

        # Draw a door line
        for w in range(W1, W2):
            cv2.circle(frame, (w, door_dict[w]), 1, (0, 255, 255), -1)
        cv2.circle(frame, (W1, H1), 4, (0, 0, 255), -1)
        cv2.circle(frame, (W2, H2), 4, (0, 0, 255), -1)

        if totalFrames % defaultSkipFrames == 0:
            boxes, confidence, classes = detect_image(
                frame, H, W, defaultConfidences)  # average time: 1.2s

            features = encoder(frame, boxes)
            detections = [
                Detection(bbox, confidence, cls, feature)
                for bbox, confidence, cls, feature in zip(
                    boxes, confidence, classes, features)
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.cls for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            for det in detections:
                bbox = det.to_tlbr()
                if show_detections and len(classes) > 0:
                    det_cls = det.cls
                    score = "%.2f" % (det.confidence * 100) + "%"
                    cv2.putText(frame,
                                str(det_cls) + " " + score,
                                (int(bbox[0]), int(bbox[3]) - 10), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 1)

            for track in tracker.tracks:
                if not track.is_confirmed():
                    continue
                bbox = track.to_tlbr()

                if not_count_staff(frame, int(bbox[0]), int(bbox[1]),
                                   int(bbox[2]), int(bbox[3])):
                    # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (0, 255, 255),
                                  2)
                    cv2.putText(frame, "STAFF",
                                (int(bbox[0]), int(bbox[1]) - 10), 0,
                                1e-3 * frame.shape[0], (0, 0, 255), 1)
                    continue
                else:
                    # adc = "%.2f" % (track.adc * 100) + "%"  # Average detection confidence
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame, "ID: " + str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame.shape[0], (0, 255, 0), 1)

                x = [c[0] for c in pts[track.track_id]]
                y = [c[1] for c in pts[track.track_id]]

                centroid_x = int(((bbox[0]) + (bbox[2])) / 2)
                centroid_y = int(((bbox[1]) + (bbox[3])) / 2)

                if not track.Counted and centroid_x in range(W1, W2):
                    if centroid_y < np.mean(y) and door_dict[
                            centroid_x] > centroid_y and np.max(x) - np.min(
                                x) > 20:
                        totalIn += 1
                        track.Counted = True
                        print(track.track_id, track.Counted)

                cv2.circle(frame, (centroid_x, centroid_y), 4, (0, 255, 0), -1)
                pts[track.track_id].append((centroid_x, centroid_y))

            info = [("Time", "{:.4f}".format(videotime)), ("In", totalIn)]

            # loop over the info tuples and draw them on our frame
            for (i, (k, v)) in enumerate(info):
                text = "{}: {}".format(k, v)
                cv2.putText(frame, text, (W - 150, ((i * 20) + 20)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
            if writeVideo_flag:
                # save a frame
                out.write(frame)

            if show_detections:
                cv2.imshow('People counter', frame)
                # Press Q to stop!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
        else:
            # Call the tracker
            tracker.predict()
            tracker.update(detections)

        fps_imutils.update()

        totalFrames += 1

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if writeVideo_flag:
        out.release()

    video_capture.release()

    cv2.destroyAllWindows()