Example #1
0
def is_empty(result_frame):
    input_size = size
    frame_size = result_frame.shape[:2]
    image_data = cv2.resize(result_frame, (input_size, input_size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    # detect on full image or part of image
    batch_data = tf.constant(image_data)

    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=iou,
        score_threshold=score_human
    )

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
    original_h, original_w, _ = result_frame.shape
    bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

    summ_of_obj_probabilities = sum([sum(i) for i in bboxes])

    return summ_of_obj_probabilities == 0
def run_DL(_frame):

    # if pt_cfg.POLYTRACK.DL_DARK_SPOTS:
    #     dark_spots = pt_cfg.POLYTRACK.RECORDED_DARK_SPOTS
    #     if len(dark_spots):
    #         _frame = map_darkspots(_frame, dark_spots)
    #     else:
    #         pass
    # else:
    #     pass

    _frame = cv2.cvtColor(_frame, cv2.COLOR_BGR2RGB)
    image = Image.fromarray(_frame)

    frame_size = _frame.shape[:2]
    image_data = cv2.resize(_frame, (cfg.YOLO.INPUT_SIZE, cfg.YOLO.INPUT_SIZE))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)

    batch_data = tf.constant(image_data)
    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=pt_cfg.POLYTRACK.MAX_OUTPUT_SIZE_PER_CLASS,
        max_total_size=pt_cfg.POLYTRACK.MAX_TOTAL_SIZE,
        iou_threshold=pt_cfg.POLYTRACK.DL_IOU_THRESHOLD,
        score_threshold=pt_cfg.POLYTRACK.DL_SCORE_THRESHOLD)

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
    original_h, original_w, _ = _frame.shape
    bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

    pred_bbox = [
        bboxes,
        scores.numpy()[0],
        classes.numpy()[0],
        valid_detections.numpy()[0]
    ]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    _detections = dl_detections_process(pred_bbox)

    return _detections
Example #3
0
def get_detected_zone(result_frame, bodyguard=['helmet'], forbidden=False):
    input_size = size
    frame_size = result_frame.shape[:2]
    image_data = cv2.resize(result_frame, (input_size, input_size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    # detect on full image or part of image
    batch_data = tf.constant(image_data)


    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=iou,
        score_threshold=score_human
    )

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
    original_h, original_w, _ = result_frame.shape
    bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

    # if we control emptiness of a room
    if forbidden:
        return bboxes

    obj_detections = []

    image = Image.fromarray(result_frame)
    for i in range(valid_detections.numpy()[0]):
        # save persons parts
        image_tmp = image.crop((bboxes[i][0] - 10, bboxes[i][1] - 10, bboxes[i][2] + 10, bboxes[i][3] + 10))
        image_tmp = cv2.cvtColor(np.array(image_tmp), cv2.COLOR_BGR2RGB)

        obj_detections.append(detect_on_person(image_tmp, bodyguard))

    pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]
    image, violation = utils.draw_bbox(result_frame, pred_bbox, obj_detections, obj_threshold=score_obj)
    return image, violation
Example #4
0
def detector(images_coming, threshold, prop):
    FLAGS(sys.argv)
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    input_size = prop['size']

    # load model
    saved_model_loaded = tf.saved_model.load(prop['weights'],
                                             tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, org_image in enumerate(images_coming, 1):
        original_image = cv2.cvtColor(org_image, 1)
        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)
        infer = saved_model_loaded.signatures['serving_default']
        batch_data = tf.constant(images_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        # run non max suppression on detections
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=prop['iou'],
            score_threshold=threshold)

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = original_image.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        # hold all detection data in one variable
        pred_bbox = [
            bboxes,
            scores.numpy()[0],
            classes.numpy()[0],
            valid_detections.numpy()[0]
        ]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # if count flag is enabled, perform counting of objects
        if prop['count']:
            # count objects found
            counted_classes = count_objects(pred_bbox,
                                            by_class=True,
                                            allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    prop['info'],
                                    counted_classes,
                                    allowed_classes=allowed_classes)
        else:
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    prop['info'],
                                    allowed_classes=allowed_classes)

        image = Image.fromarray(image.astype(np.uint8))
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        return image, counted_classes, pred_bbox
Example #5
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    firstFrame = True
    frame_num = 0
    while True:
        return_value, frame_1 = vid.read()
        pts = []
        aa = []
        bb = []
        cc = []
        dd = []
        while firstFrame:

            def click_event(event, x, y, flags, param):
                global pts
                if event == cv2.EVENT_LBUTTONDOWN:
                    pts.append((x, y))
                    cv2.circle(frame_1,
                               center=(x, y),
                               radius=5,
                               color=(0, 0, 255),
                               thickness=-1)
                    strXY = str(x) + " " + str(y)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    cv2.putText(frame_1, strXY, (x, y), font, 0.5,
                                (255, 255, 0), 2)
                elif event == cv2.EVENT_RBUTTONDOWN:
                    if pts:
                        pts.pop()
                cv2.imshow('bobur', frame_1)

            cv2.imshow('bobur', frame_1)
            cv2.setMouseCallback('bobur', click_event)
            if cv2.waitKey(1) & 0xFF == ord('c'):
                firstFrame = False
                break
            if len(pts) >= 4:
                aa.append(pts[0])
                bb.append(pts[1])
                cc.append(pts[2])
                dd.append(pts[3])
            print(aa, bb, cc, dd)

        a, b, c, d, e, f, g, h = [209, 1040], [331, 197], [1124, 197], [
            1907, 850
        ], [0, 0], [1920, 0], [1920, 1080], [0, 1080]
        # e,f,g,h = [0,0],[1920,0],[1920,1080],[0,1080]
        external_poly = [
            np.array([e, b, c, f]),
            np.array([f, c, d, g]),
            np.array([g, d, a, h]),
            np.array([h, a, b, e])
        ]
        frame = cv2.fillPoly(frame_1, external_poly, (0, 0, 0))
        # cv2.line(frame,(209, 1040),(331,197),(255,0,0),2)
        # cv2.line(frame,(331, 197), (1124,197),(255,0,0),2)
        # cv2.line(frame,(1124,197),(1907,850),(255,0,0),2)
        # cv2.line(frame,(209, 1040),(1907,850),(255,0,0),2)
        # cv2.line(frame,a,b,(255,0,0),2)
        # cv2.line(frame,b,c,(255,0,0),2)
        # cv2.line(frame,c,d,(255,0,0),2)
        # cv2.line(frame,a,d,(255,0,0),2)

        if return_value:
            # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)  #rotate the video for mobile videos
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_num += 1
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        if frame_num % 15 == 0:

            frame_size = frame.shape[:2]
            image_data = cv2.resize(frame, (input_size, input_size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            if FLAGS.framework == 'tflite':
                interpreter.set_tensor(input_details[0]['index'], image_data)
                interpreter.invoke()
                pred = [
                    interpreter.get_tensor(output_details[i]['index'])
                    for i in range(len(output_details))
                ]
                if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                    boxes, pred_conf = filter_boxes(
                        pred[1],
                        pred[0],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
                else:
                    boxes, pred_conf = filter_boxes(
                        pred[0],
                        pred[1],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
            else:
                batch_data = tf.constant(image_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=100,
                max_total_size=100,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score)

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(boxes.numpy()[0], original_h,
                                        original_w)

            pred_bbox = [
                bboxes,
                scores.numpy()[0],
                classes.numpy()[0],
                valid_detections.numpy()[0]
            ]
            # print(pred_bbox[2])
            out_boxes, out_scores, out_classes, num_boxes = pred_bbox

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            # allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES)
            allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike']
            # allowed_classes = ['car']

            # if crop flag is enabled, crop each detection and save it as new image
            if FLAGS.crop:
                crop_rate = 150  # capture images every so many frames (ex. crop photos every 150 frames)
                crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                         video_name)
                try:
                    os.mkdir(crop_path)
                except FileExistsError:
                    pass
                if frame_num % crop_rate == 0:
                    final_path = os.path.join(crop_path,
                                              'frame_' + str(frame_num))
                    try:
                        os.mkdir(final_path)
                    except FileExistsError:
                        pass
                    crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
                                 pred_bbox, final_path, allowed_classes)
                else:
                    pass

            if FLAGS.count:
                # count objects found
                counted_classes = count_objects(
                    pred_bbox, by_class=True, allowed_classes=allowed_classes)
                # loop through dict and print
                for key, value in counted_classes.items():
                    print("Number of {}s: {}".format(key, value))
                image = utils.draw_bbox(frame,
                                        pred_bbox,
                                        FLAGS.info,
                                        counted_classes,
                                        allowed_classes=allowed_classes,
                                        read_plate=FLAGS.plate)
            else:
                image = utils.draw_bbox(frame,
                                        pred_bbox,
                                        FLAGS.info,
                                        allowed_classes=allowed_classes,
                                        read_plate=FLAGS.plate)

            fps = 1.0 / (time.time() - start_time)
            print("FPS: %.2f" % fps)
            result = np.asarray(image)
            cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
            result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            if not FLAGS.dont_show:
                cv2.imshow("result", result)

            if FLAGS.output:
                out.write(result)
            if cv2.waitKey(1) & 0xFF == ord('q'): break
    vid.release()
    cv2.destroyAllWindows()
        max_total_size=50,
        iou_threshold=0.45,
        score_threshold=0.50)

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)

    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    # by default allow all classes in .names file
    allowed_classes = list(class_names.values())

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_num += 1
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
    
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to allow detections for only people)
        #allowed_classes = ['person']

        # if crop flag is enabled, crop each detection and save it as new image
        if FLAGS.crop:
            crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames)
            crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name)
            try:
                os.mkdir(crop_path)
            except FileExistsError:
                pass
            if frame_num % crop_rate == 0:
                final_path = os.path.join(crop_path, 'frame_' + str(frame_num))
                try:
                    os.mkdir(final_path)
                except FileExistsError:
                    pass          
                crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes)
            else:
                pass

        if FLAGS.count:
            # count objects found
            counted_classes = count_objects(pred_bbox, by_class = True, allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
        else:
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
        
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # if not FLAGS.dont_show:
            # cv2.imshow("result", result)
        
        if FLAGS.output:
            out.write(result)
Example #8
0
def iterate(lines, model, vid, frame_num):
    tracks = []

    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)
    else:
        print('Video has ended or failed, try a different video format!')
        cv2.destroyAllWindows()
        return False, tracks

    frame_size = frame.shape[:2]
    image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    start_time = time.time()

    # run detections on tflite if flag is set
    if FLAGS.framework == 'tflite':
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
        # run detections using yolov3 if flag is set
        if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
            boxes, pred_conf = filter_boxes(pred[1],
                                            pred[0],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0],
                                            pred[1],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
    else:
        batch_data = tf.constant(image_data)
        pred_bbox = model.signatures['serving_default'](batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score)

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)

    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    # by default allow all classes in .names file
    #allowed_classes = list(class_names.values())

    # custom allowed classes (uncomment line below to customize tracker for only people)
    #allowed_classes = ['person']
    allowed_classes = ['car', 'bus', 'truck']

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
        class_name = class_names[class_indx]
        if class_name not in allowed_classes:
            deleted_indx.append(i)
        else:
            names.append(class_name)

    names = np.array(names)
    count = len(names)
    if FLAGS.count:
        cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
        print("Objects being tracked: {}".format(count))

    # delete detections that are not in allowed_classes
    bboxes = np.delete(bboxes, deleted_indx, axis=0)
    scores = np.delete(scores, deleted_indx, axis=0)

    # encode yolo detections and feed to tracker
    features = encoder(frame, bboxes)
    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      bboxes, scores, names, features)
    ]

    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]

    # Call the tracker
    tracker.predict()
    tracker.update(detections)

    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        bbox = track.to_tlbr()
        class_name = track.get_class()

        tracks.append(
            Rect(track.track_id, (int(bbox[0]), int(bbox[1])),
                 (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]))))

        # draw bbox on screen
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                      (int(bbox[2]), int(bbox[3])), color, 2)
        cv2.rectangle(
            frame, (int(bbox[0]), int(bbox[1] - 30)),
            (int(bbox[0]) +
             (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])),
            color, -1)
        cv2.putText(frame, class_name + "-" + str(track.track_id),
                    (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                    (255, 255, 255), 2)

        # if enable info flag then print details about each track
        #if FLAGS.info:
        #    print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

    for line in lines:
        cv2.line(frame, line.pt1, line.pt2, line.color, 3)
        cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2)
        cv2.putText(frame, str(line.count), line.center,
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (255, 255, 0), 2)

    # calculate frames per second of running detections
    fps = 1.0 / (time.time() - start_time)
    print("FPS: %.2f" % fps)
    result = np.asarray(frame)
    result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    if not FLAGS.dont_show:
        cv2.imshow("Output Video", result)

    # if output flag is set, save video file
    if FLAGS.output:
        out.write(result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        return False, tracks

    return True, tracks
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
        allowed_classes = ['person']
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)
        pred_bbox = [
            bboxes,
            scores.numpy()[0],
            classes.numpy()[0],
            valid_detections.numpy()[0]
        ]
        image = utils.draw_bbox(frame,
                                pred_bbox,
                                allowed_classes=allowed_classes)

        if FLAGS.covid:
            distance = social_distance(pred_bbox, frame, allowed_classes)

        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("result", result)

        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
def detect_video(url):
    """
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = 416

    saved_model_loaded = tf.saved_model.load('./checkpoints/yolov4Tiny-416', tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']
    """
    try:
        name = random.random()
        vid = cv2.VideoCapture(url)

        # out = None

        currentFrame = 0
        while True:
            return_value, frame = vid.read()
            if return_value:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame)
            else:
                print('Video has ended or failed, try a different video format!')
                break

            currentFrame += 1
            if ((currentFrame % 4) == 0) & ((currentFrame % 15) != 0):
                continue

            frame_size = frame.shape[:2]
            image_data = cv2.resize(frame, (input_size, input_size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=0.45,
                score_threshold=0.25
            )



            #bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

            pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]

            if (currentFrame % 15) == 0:
                original_h, original_w, _ = frame.shape
                bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)
                predictions = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]
                crop_detections(frame, predictions, 200, 50)

            image = utils.draw_bbox(frame, pred_bbox)
            fps = 1.0 / (time.time() - start_time)
            print("FPS: %.2f" % fps)
            result = np.asarray(image)
            cv2.namedWindow(str(id), cv2.WINDOW_AUTOSIZE)
            result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            cv2.imshow(str(name), result)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cv2.destroyAllWindows()
        return None
    except Exception as e:
        print('oof', e)
        cv2.destroyAllWindows()
        return None
def inference(preprocess_queue, inference_queue):

    import tensorflow as tf
    import core.utils as utils

    from tensorflow.python.saved_model import tag_constants
    from tensorflow.compat.v1 import InteractiveSession
    from tensorflow.compat.v1 import ConfigProto
    from core.functions import count_objects, crop_objects
    from core.config import cfg
    from core.utils import read_class_names
    import os
    import random
    from core.yolov4 import filter_boxes

    tf.keras.backend.clear_session()

    input_size = Parameters.input_size

    model = OutsourceContract.model
    framework = Parameters.framework
    tiny = OutsourceContract.tiny
    weights = Parameters.weights
    iou = Parameters.iou
    score = Parameters.score

    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    try:
        if len(physical_devices) > 0:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except:
        pass

        # configure gpu usage
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)

    # load model
    if framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=weights)
    else:
        saved_model_loaded = tf.saved_model.load(weights,
                                                 tags=[tag_constants.SERVING])

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    count = Parameters.count
    info = Parameters.info
    crop = Parameters.crop

    while True:
        if not preprocess_queue.empty():
            queueData = preprocess_queue.get()
            while not preprocess_queue.empty():
                queueData = preprocess_queue.get()
            #preprocess_queue.task_done()
            images_data = queueData[0]
            name = queueData[1]
            original_image = queueData[2]

            #preprocess_queue.task_done()

            if framework == 'tflite':
                interpreter.allocate_tensors()
                input_details = interpreter.get_input_details()
                output_details = interpreter.get_output_details()
                interpreter.set_tensor(input_details[0]['index'], images_data)
                interpreter.invoke()
                pred = [
                    interpreter.get_tensor(output_details[i]['index'])
                    for i in range(len(output_details))
                ]
                if model == 'yolov3' and tiny == True:
                    boxes, pred_conf = filter_boxes(
                        pred[1],
                        pred[0],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
                else:
                    boxes, pred_conf = filter_boxes(
                        pred[0],
                        pred[1],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
            else:
                infer = saved_model_loaded.signatures['serving_default']
                batch_data = tf.constant(images_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=iou,
                score_threshold=score)  # 1.2ms

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax

            original_h, original_w, _ = original_image.shape

            bboxes = utils.format_boxes(boxes.numpy()[0], original_h,
                                        original_w)  # 1ms #-> no tf needed

            # hold all detection data in one variable
            pred_bbox = [
                bboxes,
                scores.numpy()[0],
                classes.numpy()[0],
                valid_detections.numpy()[0]
            ]

            # by default allow all classes in .names file
            allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to allow detections for only people)
            # allowed_classes = ['person']

            # if crop flag is enabled, crop each detection and save it as new image
            if crop:
                crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                         image_name)
                try:
                    os.mkdir(crop_path)
                except FileExistsError:
                    pass
                crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB),
                             pred_bbox, crop_path, allowed_classes)

            if count:
                # count objects found
                counted_classes = count_objects(
                    pred_bbox, by_class=False, allowed_classes=allowed_classes)
                # loop through dict and print
                for key, value in counted_classes.items():
                    print("Number of {}s: {}".format(key, value))
                boxtext, image = utils.draw_bbox(
                    original_image,
                    pred_bbox,
                    info,
                    counted_classes,
                    allowed_classes=allowed_classes)
            else:
                boxtext, image = utils.draw_bbox(
                    original_image,
                    pred_bbox,
                    info,
                    allowed_classes=allowed_classes)  # 0.5ms

            image = Image.fromarray(image.astype(np.uint8))  # 0.3ms

            inference_queue.put((boxtext, image, name))
Example #12
0
def main(_argv):
    avg=[]
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    #regression model load
    weight_path='./2_input_model_2-3.5%/'
    loaded_model = tf.keras.models.load_model(weight_path)

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num +=1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]
        #print("pred_bbox: ",pred_bbox[0])
        #print("scores: ",pred_bbox[1])
        #print("classes :",pred_bbox[2])
        #print("num :",pred_bbox[3])
        #print("width :",width)
        #print("height :",height)
        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        #print("boxs ",boxs)
        #print("scores ",scores)
        #print("classes ",classes)
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        #print("indices ",indices)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        cv2.putText(frame, "using regress", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)
        #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2)
        cv2.putText(frame, "frame# {}".format(frame_num), (750, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)


        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if 'entrance' not in classes:
                if len(classes)>1:
                    if(contains_duplicates(classes)==False):
                        #color = (50, 89, 170)
                        check_rect=0
                        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
                        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        ########## set sticker as low priority#############
                        if ((classes[0]=='mat' or 'sensor') and (classes[1]=='mat' or 'sensor')):
                            print("*************NO STK**********************************")
                            color = (50, 89, 170)
                            x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                            x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                            reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])
                            predictions = loaded_model.predict(reg_input)
                            a1_pred = predictions[0]
                            b1_pred = predictions[1]
                            c1_pred = predictions[2]
                            d1_pred = predictions[3]
                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                            start_point = (xmin, ymin)
                            end_point = (xmax, ymax)
                            rect1=xmax-xmin
                            rect2=ymax-ymin
                            check_rect=rect2/rect1
                        ################ else condition for sticker ######
                        else:
                            print("*************USE STK**********************************")
                            if ((classes[0]=='famSticker' or 'okmartSticker' or 'sevenSticker')):
                                color = (60, 120, 40)
                                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])


                           #### rario ####
                                C1_x=boxs[0][0]+(boxs[0][2]/2)
                                C1_y=boxs[0][1]+(boxs[0][3]/2)
                                C2_x=bboxes[1][0]+(bboxes[1][2]/2)
                                C2_y=bboxes[1][1]+(bboxes[1][3]]/2)
                                Dx = (C2_x - C1_x);
    	                        Dy = (C2_y - C1_y);
                            #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle
                                if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2);

                        	 #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle
                                elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2);


                        	 #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices,
                        	 # Using the Pythagorean theorem, it is easy to calculate this distance
                                elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2);
                                    int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2);
                                    min_dist = sqrt(delta_x * delta_x  + delta_y * delta_y);
                        	 #The intersection of two rectangles, the minimum distance is negative, return -1
                                else:
                        		          min_dist = -1;
                                if(classes[1]=='mat'):
                                    if((min_dist/Dy)<3):
                                        predictions = loaded_model.predict(reg_input)
                                        a1_pred = predictions[0]
                                        b1_pred = predictions[1]
                                        c1_pred = predictions[2]
                                        d1_pred = predictions[3]
                                        xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                        start_point = (xmin, ymin)
                                        end_point = (xmax, ymax)
                                        rect1=xmax-xmin
                                        rect2=ymax-ymin
                                        check_rect=rect2/rect1
                                    else:
                                        print("not predict")
                                elif(classes[1]=='sensor'):
                                    if((min_dist/Dx)<3):
                                        predictions = loaded_model.predict(reg_input)
                                        a1_pred = predictions[0]
                                        b1_pred = predictions[1]
                                        c1_pred = predictions[2]
                                        d1_pred = predictions[3]
                                        xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                        start_point = (xmin, ymin)
                                        end_point = (xmax, ymax)
                                        rect1=xmax-xmin
                                        rect2=ymax-ymin
                                        check_rect=rect2/rect1
                                    else:
                                        print("not predict")
                            elif((classes[1]=='famSticker' or 'okmartSticker' or 'sevenSticker')):
                                color = (60, 120, 40)
                                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])


                           #### rario ####
                                C1_x=boxs[0][0]+(boxs[0][2]/2)
                                C1_y=boxs[0][1]+(boxs[0][3]/2)
                                C2_x=bboxes[1][0]+(bboxes[1][2]/2)
                                C2_y=bboxes[1][1]+(bboxes[1][3]/2)
                                Dx = (C2_x - C1_x)
    	                        Dy = (C2_y - C1_y)
                                #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle
                                    if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2)

                            	 #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle
                                    elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)


                            	 #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices,
                            	 # Using the Pythagorean theorem, it is easy to calculate this distance
                                    elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)
                                        int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2)
                                        min_dist = sqrt(delta_x * delta_x  + delta_y * delta_y)
                            	 #The intersection of two rectangles, the minimum distance is negative, return -1
                                    else:
                            		          min_dist = -1
                                    if(classes[0]=='mat'):
                                        if((min_dist/Dy)<3):
                                            predictions = loaded_model.predict(reg_input)
                                            a1_pred = predictions[0]
                                            b1_pred = predictions[1]
                                            c1_pred = predictions[2]
                                            d1_pred = predictions[3]
                                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                            start_point = (xmin, ymin)
                                            end_point = (xmax, ymax)
                                            rect1=xmax-xmin
                                            rect2=ymax-ymin
                                            check_rect=rect2/rect1
                                        else:
                                            print("not predict")
                                    elif(classes[0]=='sensor'):
                                        if((min_dist/Dx)<3):
                                            predictions = loaded_model.predict(reg_input)
                                            a1_pred = predictions[0]
                                            b1_pred = predictions[1]
                                            c1_pred = predictions[2]
                                            d1_pred = predictions[3]
                                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                            start_point = (xmin, ymin)
                                            end_point = (xmax, ymax)
                                            rect1=xmax-xmin
                                            rect2=ymax-ymin
                                            check_rect=rect2/rect1
                                        else:
                                            print("not predict")


                      ##############



                        ##########################################
                        ######## check door size and display #########if check_rect>1 and frame_num !=104:

                        print("check_rect:{}".format(check_rect))
                        if check_rect>1 :
                            blk = np.zeros(frame.shape, np.uint8)
                            cv2.rectangle(blk, start_point, end_point, color, cv2.FILLED)
                            frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
                            print("predict_BBox Coords (xmin, ymin, xmax, ymax): {}".format((xmin,ymin,xmax,ymax)))
                        else:
                            print("not show predicted bbox")
                        ###############################
            ########
            #      select one entrace
            ########
            #if classes.count('entrance')>1:
            #    entrance_num=[]
            #    iou_list=[]
            #    iou_check=[]
            #    for i in range(len(classes)):
            #        if classes[i]=='entrance'
            #        entrance_num.append(i)
            #        if len(classes)>1:
            #            if(contains_duplicates(classes)==False):
            #                color = (50, 89, 170)
            #                width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
            #                height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
            #                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
            #                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])
            #                predictions = loaded_model.predict(reg_input)
            #                a1_pred = predictions[0]
            #                b1_pred = predictions[1]
            #                c1_pred = predictions[2]
            #                d1_pred = predictions[3]
            #                xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
            #                ###IOU###
            #                GT_bbox_area = (xmax -  xmin + 1) * (  ymax -ymin + 1)
            #                ###########
            #                ##check entrace##
            #                Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1)
            #                x_top_left =np.max([x_topleft_gt, x_topleft_p])
            #                y_top_left = np.max([y_topleft_gt, y_topleft_p])
            #                x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p])
            #                y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p])
            #
            #                intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left  + 1)
            #
            #                union_area = (GT_bbox_area + Pred_bbox_area - intersection_area)
            #
            #                iou_check.append(intersection_area/union_area)
            #
            #        for j in len(iou_check):
            #           if entrance_num[j]<iou_check.max:
            #               track.delete
            #if(int(track.track_id)>=3 or (int(track.track_id)>10 and int(track.track_id)<20 ) ):
            #frame_num
            ###################### draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            if(class_name=='entrance'):
                if( int(track.track_id)==1 and frame_num>121):
                    print("skip Tracker ID: {}, Class: {}".format(str(track.track_id), class_name))
                else:
                    print("RED Tracker ID: {}, Class: {}".format(str(track.track_id), class_name))
                    blk = np.zeros(frame.shape, np.uint8)
                    cv2.rectangle(blk,(int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), (255, 0, 0), cv2.FILLED)
                    frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
                    cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), color, 2)
                    cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05-30)), (int(bbox[0]*1.05)+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1]*1.05)), color, -1)
                    cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]*1.05), int(bbox[1]*1.05-10)),0, 0.75, (255,255,255),2)


        # if enable info flag then print details about each track
            if FLAGS.info:
                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        avg.append(fps)
        print("avg fps {}".format(statistics.mean(avg)))
        cv2.putText(frame, "FPS: %.2f" % fps, (50, 500), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
Example #13
0
def detect(saved_model_loaded, infer, input_size, image_path):
    # config = ConfigProto()
    # session = InteractiveSession(config=config)
    # STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    # start = time.perf_counter()

    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    image_data = cv2.resize(original_image, (input_size, input_size))
    image_data = image_data / 255.
    count = 0
    # get image name by using split method
    image_name = image_path.split('/')[-1]
    image_name = image_name.split('.')[0]
    images_data = []

    for i in range(1):
        images_data.append(image_data)

    images_data = np.asarray(images_data).astype(np.float32)
    #infer = saved_model_loaded.signatures['serving_default']
    batch_data = tf.constant(images_data)
    pred_bbox = infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]

    # run non max suppression on detections
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=0.20,
        score_threshold=0.20)

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
    original_h, original_w, _ = original_image.shape
    bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

    # hold all detection data in one variable
    pred_bbox = [
        bboxes,
        scores.numpy()[0],
        classes.numpy()[0],
        valid_detections.numpy()[0]
    ]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox)

    # count objects found
    counted_classes = count_objects(pred_bbox,
                                    by_class=True,
                                    allowed_classes=['person'])
    # print(counted_classes.items())

    if len(counted_classes.items()) != 0:
        count += counted_classes['person']
    else:
        count += 0

    # image = utils.draw_bbox(original_image, pred_bbox, False, counted_classes, allowed_classes=['person'])
    # image = Image.fromarray(image.astype(np.uint8))
    # image.show()
    # image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    # cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)

    update_db(count)
Example #14
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    image_path = FLAGS.image

    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

    # image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size])
    image_data = cv2.resize(original_image, (input_size, input_size))
    image_data = image_data / 255.
    # image_data = image_data[np.newaxis, ...].astype(np.float32)

    images_data = []
    for i in range(1):
        images_data.append(image_data)
    images_data = np.asarray(images_data).astype(np.float32)

    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
        interpreter.set_tensor(input_details[0]['index'], images_data)
        interpreter.invoke()
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
        if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
            boxes, pred_conf = filter_boxes(pred[1],
                                            pred[0],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [input_size, input_size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0],
                                            pred[1],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [input_size, input_size]))
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']
        batch_data = tf.constant(images_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score)

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
    original_h, original_w, _ = original_image.shape
    bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

    # hold all detection data in one variable
    pred_bbox = [
        bboxes,
        scores.numpy()[0],
        classes.numpy()[0],
        valid_detections.numpy()[0]
    ]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    # by default allow all classes in .names file
    allowed_classes = list(class_names.values())

    # pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
    # image = utils.draw_bbox(original_image, pred_bbox)
    # image = utils.draw_bbox(image_data*255, pred_bbox)

    if FLAGS.count:
        # count objects found
        counted_classes = count_objects(pred_bbox,
                                        by_class=True,
                                        allowed_classes=allowed_classes)
        # loop through dict and print
        for key, value in counted_classes.items():
            print("Number of {}s: {}".format(key, value))
        image = utils.draw_bbox(original_image,
                                pred_bbox,
                                FLAGS.info,
                                counted_classes,
                                allowed_classes=allowed_classes)
    else:
        image = utils.draw_bbox(original_image,
                                pred_bbox,
                                FLAGS.info,
                                allowed_classes=allowed_classes)

    image = Image.fromarray(image.astype(np.uint8))
    image.show()
    image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    cv2.imwrite(FLAGS.output, image)
Example #15
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_num += 1
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
    
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
       )

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)
        
        pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]
        # print(pred_bbox[2])
        out_boxes, out_scores, out_classes, num_boxes = pred_bbox

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES)
        allowed_classes = ['car','truck','motorbike','bus']
        # allowed_classes = ['car']

        #################################################################################################################################
        #Calculting the distance   xmin, ymin, xmax, ymax
        cv2.line(img=frame, pt1=(595,940),pt2=(1567,940),  color=(0, 0, 180), thickness=3, lineType=8, shift=0)
        for i, b in zip(out_boxes,out_classes):
            f = 1460 ## Focal length of the camera
            if i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<910 and int(b) == 2: 
                wpix = i[2] - i[0]
                w = 1.7  #car width
                # D = round((f*w)/wpix, 2)
                # h = 1.2        #ORIGINAL === 1.6            # Most vehicles have a size that ranges from 1.5 – 1.8 meters high and widths of 1.6-1.7 meters.
                
                d_original =round((f*w)/wpix,2)
                d = d_original - 3    #2.3 from the camera to the front of the car 2.2 is for yolo car 
                d = round(d,2)
                print("{} meters".format(d_original),end = ",")
                print("{} meters".format(d))
                cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2)


            elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 3:
                # wpix = i[3] - i[1]
                # w = 1.6  #car width
                # D = round((f*w)/wpix, 2)
                h = 0.75           # Most motorbikes height 75 cm 
                d_original =round((f*h)/wpix,2)
                d = d_original - 2.3 -1.1  # 2.3 from the camera to the front of the car 1.1 is for yolo motorbike 
                d = round(d,2)
                print("{} meters".format(d_original),end = ",")
                print("{} meters".format(d))
                cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2)

            # ##Because bus is too long I will measure with width 
            elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 5:
                wpix = i[2] - i[0]
                # w = 1.6  #
                # D = round((f*w)/wpix, 2)
                # h = 3.5           # Most buses height 4.3 meters
                w = 2.3
                d_original =round((f*w)/wpix,2)
                d = d_original - 2.3 # 2.3 from the camera to the front of the car  BUSSS
                d = round(d,2) 
                print("{} meters".format(d_original),end = ",")
                print("{} meters".format(d))
                cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2)
              
            elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<940 and int(b) == 6:
                wpix = i[3] - i[1]
                # w = 1.6  #car width
                # D = round((f*w)/wpix, 2)
                h = 4.3          # Most train height 4.3 meters 
                d_original =round((f*h)/wpix,2)
                d = d_original - 2.3  # 2.3 from the camera to the front of the car, NO TRAINNNNNNNNN
                d = round(d,2)
                print("{} meters".format(d_original),end = ",")
                print("{} meters".format(d))
                cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2)

            # #Because truck is too long I will measure with width     
            elif i[0]>1 and i[1]>105 and i[2]<1920 and i[3]<910 and int(b) == 7:
                wpix = i[3] - i[1]
                # w = 2.7  #Most truck height 2.7
                # D = round((f*w)/wpix, 2)
                h = 3       # Most truck height 4.3 meters
                # w = 1.8
                d_original =round((f*h)/wpix,2)
                d = d_original - 3  # 2.3 from the camera to the front of the car 5 is for yolo TRUCKCKKKKKKK
                d = round(d,2)
                print("{} meters".format(d_original),end = ",")
                print("{} meters".format(d))
                cv2.putText(frame, "{}m".format(d), (int(i[0]+ ((int(i[2]-int(i[0]))/2))), int(i[1])-20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (36,255,12), 2)
        
            else:
                None
        ###################################################################################################################################



        # if crop flag is enabled, crop each detection and save it as new image
        if FLAGS.crop:
            crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames)
            crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name)
            try:
                os.mkdir(crop_path)
            except FileExistsError:
                pass
            if frame_num % crop_rate == 0:
                final_path = os.path.join(crop_path, 'frame_' + str(frame_num))
                try:
                    os.mkdir(final_path)
                except FileExistsError:
                    pass          
                crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes)
            else:
                pass

        if FLAGS.count:
            # count objects found
            counted_classes = count_objects(pred_bbox, by_class = False, allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
        else:
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate)


        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if not FLAGS.dont_show:
            cv2.imshow("result", result)
        
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)

    # Loading the pretrained model weights
    saved_model_loaded = tf.saved_model.load(FLAGS.weights_path,
                                             tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    # Loading the stores configuration JSON file
    stores_config_filename = 'stores_sections.json'
    stores_config_filepath = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), stores_config_filename)
    stores_sections = load_json_file(stores_config_filepath)

    #Creating engine to query from the database
    engine = get_db()

    # Getting the list of video filenames that had been processed
    processed_videos = pd.read_sql('SELECT DISTINCT name_video FROM counts',
                                   engine)
    processed_videos = processed_videos.name_video.tolist()

    # Get the current directory where this file person_tracker.py is located
    file_directory = os.getcwd()

    # Changing to the root directory (Google Colab root directory in this case)
    # to be able to extract the videofilenames in another location
    os.chdir('/content')

    # Getting the video filenames available on the repository
    mypath = FLAGS.videos_repository_path
    onlyfiles = [
        f for f in os.listdir(mypath)
        if os.path.isfile(os.path.join(mypath, f))
    ]

    # Changing back to the person_tracker.py directory to continue with the process of the videos
    os.chdir(file_directory)

    # Computing the video filanames that need to be processed
    videos_to_process = list(set(onlyfiles) - set(processed_videos))
    print("Videos to process: ", len(videos_to_process))

    # Loop for process all the videos that had not been processed
    for i in range(0, len(videos_to_process)):
        print(f"Processing video: {i+1}/{len(videos_to_process)}")

        # Initializing variables from the Flags values
        input_size = FLAGS.size
        video_path = os.path.join(mypath, videos_to_process[i])
        print(video_path)
        output_csv_path = FLAGS.output_csv_path
        count_csv_path = FLAGS.count_csv_path
        file_name = video_path.split("/")[-1]

        #Extract the date and time information and camera number from the video_path string
        if len(re.findall('[0-9]{14}', video_path)) == 2:
            time_start_vid, time_end_vid = re.findall('[0-9]{14}', video_path)
            time_start_vid_dt = datetime.strptime(str(time_start_vid),
                                                  '%Y%m%d%H%M%S')
            time_end_vid_dt = datetime.strptime(str(time_end_vid),
                                                '%Y%m%d%H%M%S')
            camera = int(re.findall(r'_([0-9]{1})_', video_path.lower())[0])

        # Limit line points for the people counter
        # the only cameras of interest to count people in and out is camera 1 and camera 2
        if camera == 1:
            startline = (614, 95)
            endline = (807, 95)
        elif camera == 2:
            startline = (305, 175)
            endline = (476, 175)
        else:
            startline = (0, 0)
            endline = (0, 0)

        # Extract the name of the store from the video_path string
        store_name = re.findall(r'/([a-z0-9\s]*)_*', video_path.lower())[-1]

        # Change the default name that video filenames have of san diego store
        if store_name == 'hermeco oficinas':
            store_name = 'san diego'

        # Begin video capture
        try:
            vid = cv2.VideoCapture(int(video_path))
        except:
            vid = cv2.VideoCapture(video_path)

        # Get video features
        out = None
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        frame_count = int(vid.get(
            cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames in the video
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        delta_time = (time_end_vid_dt - time_start_vid_dt) / frame_count

        # get video ready to save locally if flag is set
        if FLAGS.output_vid:
            out = cv2.VideoWriter(FLAGS.output_vid, codec, fps,
                                  (width, height))

        frame_num = 1

        # Initialize the fields of the dataframe that will store the detections
        detections_df = pd.DataFrame({
            'Store_name': [],
            'Start_date': [],
            'End_date': [],
            'current_datetime': [],
            'Camera': [],
            'Object': [],
            'Id': [],
            'X_center_original': [],
            'Y_center_original': [],
            'X_center_perspective': [],
            'Y_center_perspective': [],
            'X_min': [],
            'Y_min': [],
            'X_max': [],
            'Y_max': [],
            'Frame': []
        })
        temp = pd.DataFrame()

        # vector that will store the las 15 locations of each track
        pts = [deque(maxlen=15) for _ in range(10000)]

        counter_out = []
        counter_in = []

        start_process = time.time()

        # while video is running
        while True:
            # Get the frame image from the video
            return_value, frame = vid.read()

            # transform the defailt color of OpenCV frame BGR to RGB
            if return_value:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame)
            else:
                print(
                    'Video has ended or failed, try a different video format!')
                break

            #print('Frame #: ', frame_num)
            # Preprocessing the frame image
            frame_size = frame.shape[:2]
            image_data = cv2.resize(frame, (input_size, input_size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            # Getting all the bounding boxes of the detections and their respective confidence
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            # Applying Non-maximum Suppression to get the best bounding box for each detection
            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score_th)

            # Convert data to numpy arrays and slice out unused elements
            num_objects = valid_detections.numpy()[0]
            bboxes = boxes.numpy()[0]
            bboxes = bboxes[0:int(num_objects)]
            scores = scores.numpy()[0]
            scores = scores[0:int(num_objects)]
            classes = classes.numpy()[0]
            classes = classes[0:int(num_objects)]

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(bboxes, original_h, original_w)

            # store all predictions in one parameter for simplicity when calling functions
            pred_bbox = [bboxes, scores, classes, num_objects]

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            #allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to customize tracker for only people)
            allowed_classes = ['person']

            # loop through objects and use class index to get class name, allow only classes in allowed_classes list
            names = []
            deleted_indx = []
            for i in range(num_objects):
                class_indx = int(classes[i])
                class_name = class_names[class_indx]
                if class_name not in allowed_classes:
                    deleted_indx.append(i)
                else:
                    names.append(class_name)
            names = np.array(names)
            count = len(names)

            if FLAGS.display_count:
                cv2.putText(frame, "Objects being tracked: {}".format(count),
                            (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                            (0, 255, 0), 2)
                print("Objects being tracked: {}".format(count))
            # delete detections that are not in allowed_classes
            bboxes = np.delete(bboxes, deleted_indx, axis=0)
            scores = np.delete(scores, deleted_indx, axis=0)

            # encode yolo detections and feed to tracker
            features = encoder(frame, bboxes)
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    bboxes, scores, names, features)
            ]

            #initialize color map
            cmap = plt.get_cmap('tab20b')
            colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

            # run non-maxima supression
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(
                boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)

            # Computing the time has passed since the beggining of the video to the current frame
            delta_time_frame = delta_time * (frame_num - 1)

            # update tracks
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                class_name = track.get_class()

                # draw bbox on screen
                color = colors[int(track.track_id) % len(colors)]
                color = [i * 255 for i in color]
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), color, 2)
                cv2.rectangle(
                    frame, (int(bbox[0]), int(bbox[1] - 30)),
                    (int(bbox[0]) +
                     (len(class_name) + len(str(track.track_id))) * 17,
                     int(bbox[1])), color, -1)
                cv2.putText(frame, class_name + "-" + str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                            (255, 255, 255), 2)

                # if enable info flag then print details about each track
                if FLAGS.info:
                    print(
                        f"Tracker ID: {str(track.track_id)}, Class: {class_name},  BBox Coords (xmin, ymin, xmax, ymax): {(int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))}"
                    )

                # computing the bottom center of the bounding box
                center = (int(((bbox[0]) + (bbox[2])) / 2), int(bbox[3]))

                # Loop for the diferent configured sections of the store for the current camera
                for sec in stores_sections[store_name][
                        f'camera_{camera}'].keys():
                    # Get the 4 points of the section
                    bound_section_points = stores_sections[store_name][
                        f'camera_{camera}'][sec]['camera_view_points']
                    # Verify if the center point of the detection is in the section region
                    mpltPath_path = mpltPath.Path(bound_section_points)
                    inside = mpltPath_path.contains_point(list(center))
                    if inside:
                        # Get the perspective transformation matrix
                        transform_matrix = np.array(
                            stores_sections[store_name][f'camera_{camera}']
                            [sec]['transformation_matrix'])
                        # Apply the transformation matrix to transform the point to the blueprint perspective
                        transformed_center = point_perspective_transform(
                            center, transform_matrix)[0]
                        break
                    else:
                        transformed_center = [0, 0]

                # Appending the center to the corrent track
                pts[track.track_id].append(center)
                for j in range(1, len(pts[track.track_id])):
                    if pts[track.track_id][j - 1] is None or pts[
                            track.track_id][j] is None:
                        continue
                    thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                    cv2.line(frame, (pts[track.track_id][j - 1]),
                             (pts[track.track_id][j]), color, thickness)

                height, width, _ = frame.shape
                ##cv2.line(frame,(0,int(3*height/6)),(width,int(3*height/6)),(0,0,255), thickness = 2)
                #cv2.line(frame,(193,183),(650,183),(0,0,255),2)
                cv2.line(frame, startline, endline, (0, 0, 255), 2)

                # split the bounding box bottom center coordinates
                center_x = center[0]
                if (camera == 2):
                    center_y = int(bbox[3])
                else:
                    center_y = int(((bbox[1]) + (bbox[3])) / 2)

                # Counting if the track is leaving or entering the camera section
                # based in the direction the person is crossing a fixed line
                if (center_y <= int(startline[1] + 20)) and (
                        center_y >= int(startline[1] - 20)) and (
                            center_x >= int(startline[0] - 30)) and (
                                center_x <= int(endline[0] + 30)):
                    if class_name == 'person':
                        list_y = [i[1] for i in pts[track.track_id]]
                        in_var = all(x < y for x, y in zip(list_y, list_y[1:]))
                        out_var = all(x > y
                                      for x, y in zip(list_y, list_y[1:]))
                        if in_var and len(list_y) > 1:
                            counter_in.append(int(track.track_id))
                        elif out_var and len(list_y) > 1:
                            counter_out.append(int(track.track_id))

                # Adding the current track detection data to the dataframe
                temp = pd.DataFrame({
                    'Store_name': [store_name],
                    'Start_date': [time_start_vid_dt],
                    'End_date': [time_end_vid_dt],
                    'current_datetime': [time_start_vid_dt + delta_time_frame],
                    'Camera': [int(camera)],
                    'Object': [class_name],
                    'Id': [int(track.track_id)],
                    'X_center_original': [int(center[0])],
                    'Y_center_original': [int(center[1])],
                    'X_center_perspective': [int(transformed_center[0])],
                    'Y_center_perspective': [int(transformed_center[1])],
                    'X_min': [int(bbox[0])],
                    'Y_min': [int(bbox[1])],
                    'X_max': [int(bbox[2])],
                    'Y_max': [int(bbox[3])],
                    'Frame': [int(frame_num)]
                })
                detections_df = pd.concat([detections_df, temp],
                                          ignore_index=True)

            # Getting the total in and out counts
            total_count_in = len(set(counter_in))
            total_count_out = len(set(counter_out))

            cv2.putText(frame, 'Total Count In:' + str(len(set(counter_in))),
                        (0, 130), 0, 1, (0, 0, 255), 2)
            cv2.putText(frame, 'Total Count Out:' + str(len(set(counter_out))),
                        (0, 200), 0, 1, (0, 0, 255), 2)

            frame_num += 1
            # calculate frames per second of running detections
            fps = 1.0 / (time.time() - start_time)
            #print("FPS: %.2f" % fps)
            result = np.asarray(frame)
            result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            # if output flag is set, save video file
            if FLAGS.output_vid:
                out.write(result)
            #if cv2.waitKey(1) & 0xFF == ord('q'): break

        print("Total Processing time: ", time.time() - start_process)
        cv2.destroyAllWindows()

        # saving the detections data into a csv
        detections_df.to_csv(output_csv_path, index=False)
        print("The detections file was successfully saved!")

        # Adding the video counts data to the dataframe
        if (camera == 1) or (camera == 2):
            count_df_in = pd.DataFrame({
                'Store_name': [store_name],
                'Start_date': [time_start_vid_dt],
                'End_date': [time_end_vid_dt],
                'Camera': [camera],
                'Count': [total_count_in],
                'inout': "In",
                'name_video': [file_name]
            })
            count_df_out = pd.DataFrame({
                'Store_name': [store_name],
                'Start_date': [time_start_vid_dt],
                'End_date': [time_end_vid_dt],
                'Camera': [camera],
                'Count': [total_count_out],
                'inout': "Out",
                'name_video': [file_name]
            })
        else:
            count_df_in = pd.DataFrame({
                'Store_name': [store_name],
                'Start_date': [time_start_vid_dt],
                'End_date': [time_end_vid_dt],
                'Camera': [camera],
                'Count': [0],
                'inout': "In",
                'name_video': [file_name]
            })
            count_df_out = pd.DataFrame({
                'Store_name': [store_name],
                'Start_date': [time_start_vid_dt],
                'End_date': [time_end_vid_dt],
                'Camera': [camera],
                'Count': [0],
                'inout': "Out",
                'name_video': [file_name]
            })

        count_df = pd.concat([count_df_in, count_df_out], ignore_index=True)

        # saving the count data into into a csv
        count_df.to_csv(count_csv_path, index=False)
        print("The counts files were successfully saved!")

        #upload the detections data to the database
        upload_to_db(detections_df, 'tracker',
                     'append')  # passing the dataframe

        #upload the count data to the database
        upload_to_db(count_df, 'counts', 'append')
Example #17
0
def main(_argv):

    with open("./config_birdview.yml", "r") as ymlfile:
        bird_view_cfg = yaml.load(ymlfile)

    width_og, height_og = 0, 0
    corner_points = []
    for section in bird_view_cfg:
        corner_points.append(bird_view_cfg["image_parameters"]["p1"])
        corner_points.append(bird_view_cfg["image_parameters"]["p2"])
        corner_points.append(bird_view_cfg["image_parameters"]["p3"])
        corner_points.append(bird_view_cfg["image_parameters"]["p4"])
        width_og = int(bird_view_cfg["image_parameters"]["width_og"])
        height_og = int(bird_view_cfg["image_parameters"]["height_og"])
        img_path = bird_view_cfg["image_parameters"]["img_path"]
        size_height = bird_view_cfg["image_parameters"]["size_height"]
        size_width = bird_view_cfg["image_parameters"]["size_width"]

    tr = np.array([
        bird_view_cfg["image_parameters"]["p4"][0],
        bird_view_cfg["image_parameters"]["p4"][1],
    ])
    tl = np.array([
        bird_view_cfg["image_parameters"]["p2"][0],
        bird_view_cfg["image_parameters"]["p2"][1],
    ])
    br = np.array([
        bird_view_cfg["image_parameters"]["p3"][0],
        bird_view_cfg["image_parameters"]["p3"][1],
    ])
    bl = np.array([
        bird_view_cfg["image_parameters"]["p1"][0],
        bird_view_cfg["image_parameters"]["p1"][1],
    ])

    widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2))
    widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2))
    heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2))
    maxHeight = max(int(heightA), int(heightB))

    matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth,
                                                      maxHeight,
                                                      cv2.imread(img_path))
    height, width, _ = imgOutput.shape
    dim = (width, height)

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = "model_data/mars-small128.pb"
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == "tflite":
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures["serving_default"]

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    output_video_1, output_video_2 = None, None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        """
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        """
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:

        black_img = cv2.imread("./black_bg.png")
        black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA)

        return_value, frame = vid.read()

        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print("Video has ended or failed, try a different video format!")
            break
        frame_num += 1
        print("Frame #: ", frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.0
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == "tflite":
            interpreter.set_tensor(input_details[0]["index"], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]["index"])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == "yolov3" and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(
                    pred[1],
                    pred[0],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
            else:
                boxes, pred_conf = filter_boxes(
                    pred[0],
                    pred[1],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        (
            boxes,
            scores,
            classes,
            valid_detections,
        ) = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score,
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        #         allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ["person"]

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(
                frame,
                "Objects being tracked: {}".format(count),
                (5, 35),
                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                2,
                (0, 255, 0),
                2,
            )
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap("tab20b")
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        bbox_array = []
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            bbox_array.append(
                (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1])),
                (int(bbox[2]), int(bbox[3])),
                color,
                2,
            )
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1] - 30)),
                (
                    int(bbox[0]) +
                    (len(class_name) + len(str(track.track_id))) * 17,
                    int(bbox[1]),
                ),
                color,
                -1,
            )
            cv2.putText(
                frame,
                class_name + "-" + str(track.track_id),
                (int(bbox[0]), int(bbox[1] - 10)),
                0,
                0.75,
                (255, 255, 255),
                2,
            )

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(
                        str(track.track_id),
                        class_name,
                        (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(
                            bbox[3])),
                    ))

        if len(bbox_array) >= 1:
            array_centroids, array_groundpoints = get_centroids_and_groundpoints(
                bbox_array)
            transformed_downoids = compute_point_perspective_transformation(
                matrix, array_centroids)

            # Show every point on the top view image
            for point in transformed_downoids:
                x, y = point
                cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2)
                cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        #         result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            if output_video_1 is None and output_video_2 is None:
                fourcc1 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_1 = cv2.VideoWriter(
                    "./video.avi", fourcc1, 25,
                    (frame.shape[1], frame.shape[0]), True)
                fourcc2 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_2 = cv2.VideoWriter(
                    "./bird_view.avi",
                    fourcc2,
                    25,
                    (black_img.shape[1], black_img.shape[0]),
                    True,
                )

            elif output_video_1 is not None and output_video_2 is not None:
                output_video_1.write(frame)
                output_video_2.write(black_img)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    cv2.destroyAllWindows()
Example #18
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.1

    _ = InteractiveSession(config=config)
    utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(
            model_path=f'{FLAGS.weights}_{FLAGS.size}')
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(
            f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    all_start_time = None
    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            Image.fromarray(frame)
        else:
            fps = float(frame_num) / (time.time() - all_start_time)
            print("fps=%.2f size=%d frames=%d deep=%s output=%s" %
                  (fps, FLAGS.size, frame_num,
                   "true" if FLAGS.deep else "false", FLAGS.output))
            break
        frame_num += 1
        if FLAGS.info:
            print("frame_num=%d" % frame_num)
        start_time = time.time()
        if all_start_time is None:
            all_start_time = time.time()
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for _, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        if FLAGS.deep:
            features = encoder(frame, bboxes)
        else:
            features = np.empty((len(bboxes), 0), np.float32)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)

        # calculate frames per second of running detections
        if FLAGS.info:
            fps = 1.0 / (time.time() - start_time)
            print("fps=%.2f" % fps)

        if not FLAGS.dont_show:
            if cv2.waitKey(1) & 0xFF == ord('q'): break
    if not FLAGS.dont_show:
        cv2.destroyAllWindows()
def startRecording_YOLO():
    date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format
    vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi')
    
    
    #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam
    videothread = VideoThread(resolution=(640,480), framerate=30).start()

    width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(videothread.stream.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    
    #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #fps = int(vid.get(cv2.CAP_PROP_FPS))
    #codec = cv2.VideoWriter_fourcc(*'XVID')
    #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    frame_number = 0
    freq = cv2.getTickFrequency()
    avg_fps = 0

    #while video is running/recording
    while True:
        return_val, frame = videothread.read()
        #return_val, frame = vid.read()
        
        if return_val:
            #frame = cv2.flip(frame, -1)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video error, try another format')
            break
        
        frame_number += 1
        #print('Frame #: ', frame_number)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data/ 255.
        #mage_data = np.expand_dims(frame_resized, axis = 0)

        #if floating_model:
         #   image_data = (np.float32(image_data) - 127.5)/127.5
        image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type
        start_time = time.time()

        #TFLite Detections
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        #box = interpreter.get_tensor(output_details[0]['index'])[0]
        #scores = interpreter.get_tensor(output_details[2]['index'])[0]
        boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size]))

        #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape
        #Shape = returns a 1-D integer tensor, represents the shape of the input 
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])),
            max_output_size_per_class = 50,
            max_total_size = 50,
            iou_threshold = 0.45,
            score_threshold = 0.5
        )

        #convert the received data into numpy arrays, then slice out unused elements
        number_of_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0 : int(number_of_objects)]
        scores = scores.numpy()[0]
        scores = scores[0 : int(number_of_objects)]
        classes = classes.numpy()[0]
        classes = classes[0 : int(number_of_objects)]

        #format bounding boxes with normalized minimums and maximums of x and y
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        prediction_bbox = [bboxes, scores, classes, number_of_objects]

        #Read in all the class names from config and only allow certain ones to be detected (eases computation power)
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
        allowed_classes = ['traffic light', 'person', 'car', 'stop sign']

        #loop through objects and get classification name, using only the ones allows in allowed_classes
        names = []
        deleted_indx = []
        for i in range(number_of_objects):
            classification_index = int(classes[i])
            class_name = class_names[classification_index]
            if class_name not in allowed_classes: deleted_indx.append(i)
            else: names.append(class_name)
        names = np.array(names)
        count = len(names)

        #delete irrelevant detections (not in allowed_classes)
        bboxes = np.delete(bboxes, deleted_indx, axis = 0)
        scores = np.delete(scores, deleted_indx, axis = 0)

        #Feed tracker with encoded yolo detections
        detections_features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        #run non-maxima supression (reduces amount of detected entities to as little as possible)
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        #Call tracker
        tracker.predict()
        tracker.update(detections)

        #update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1: continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            #if class_name == 'person': print('person found')

        #change frame to that which showcases the lane detection
        #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS

        #distance approximation (barebones, needs more adjusting)
            cam_parameter = 18    #change with different cameras. Gets the detected distance closer to actual distance
            distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter
            det_dest = str(int(distance))
    
        #draw bounded box on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
        
        #calculate fps of running detections
        fps = 1.0/ (time.time() - start_time)
        avg_fps = avg_fps + fps
        #print("FPS: %.2f" % fps)
        cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        cv2.imshow("Output Video", result)

        output_video.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
    print('Average FPS: ', (avg_fps/frame_number))
    print('Number of Frames: ', frame_number)
    videothread.stop()
Example #20
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    # load model
    if FLAGS.framework == 'tflite':
            interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
    else:
            saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        if FLAGS.framework == 'tflite':
            interpreter.allocate_tensors()
            input_details = interpreter.get_input_details()
            output_details = interpreter.get_output_details()
            print(input_details)
            print(output_details)
            interpreter.set_tensor(input_details[0]['index'], images_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
        else:
            infer = saved_model_loaded.signatures['serving_default']
            batch_data = tf.constant(images_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        original_h, original_w, _ = original_image.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to allow detections for only people)
        # allowed_classes = ['person']
        allowed_classes = ['Mask Person']

        image = utils.draw_bbox(original_image, pred_bbox, allowed_classes = allowed_classes)

        if FLAGS.covid:
            distance = social_distance(pred_bbox,original_image, allowed_classes)

        image = Image.fromarray(image.astype(np.uint8))
        if not FLAGS.dont_show:
            image.show()
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
    def main(self, frame_data):
        # Definition of the parameters
        nms_max_overlap = 1.0

        # set HyperParams
        size = 416
        iou = 0.45
        score = 0.50
        info = False

        input_size = size

        self.person1.is_used = 0
        self.person2.is_used = 0
        self.person3.is_used = 0
        self.person4.is_used = 0

        out = None

        frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB)
        image_data = cv2.resize(frame_data, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        # start_time = time.time()

        batch_data = tf.constant(image_data)
        pred_bbox = self.infer(batch_data)  # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]  # 좌표
            pred_conf = value[:, :, 4:]  # 벡터값

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame_data.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if count:
            cv2.putText(frame_data, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            # print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = self.encoder(frame_data, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # DeepSort Tracking Start

        # Call the tracker
        self.tracker.predict()  # load tracker
        self.tracker.update(detections)

        match_person = 0
        # reset unmatched for center compare
        unmatched = []

        # update tracks
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # draw bbox on screen           # 이거 처리까지 하고 나서 보내야 할 것 같다.
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # Matching index with index_stack
            if self.person1.is_exist(track.track_id):
                self.person1.centerX, self.person1.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person1.index_stack[0], colors,
                              bbox)
                self.person1.is_used = 1
                match_person += 1
            elif self.person2.is_exist(track.track_id):
                self.person2.centerX, self.person2.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person2.index_stack[0], colors,
                              bbox)
                self.person2.is_used = 1
                match_person += 1
            elif self.person3.is_exist(track.track_id):
                self.person3.centerX, self.person3.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person3.index_stack[0], colors,
                              bbox)
                self.person3.is_used = 1
                match_person += 1
            elif self.person4.is_exist(track.track_id):
                self.person4.centerX, self.person4.centerY = self.getCenter(
                    bbox)
                self.draw_box(frame_data, self.person4.index_stack[0], colors,
                              bbox)
                self.person4.is_used = 1
                match_person += 1
            else:
                unmatched.append([track.track_id, bbox])
                print('found new object!')

        unmatched = np.array(unmatched, dtype=object)

        # Missed Person Only 1
        if match_person == 3 and len(unmatched) == 1:
            if self.person1.is_used == 0:
                self.person1.centerX, self.person1.centerY = self.getCenter(
                    unmatched[0][1])
                self.person1.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person1.index_stack[0], colors,
                              unmatched[0][1])
                self.person1.is_used = 1
                match_person += 1
            elif self.person2.is_used == 0:
                self.person2.centerX, self.person2.centerY = self.getCenter(
                    unmatched[0][1])
                self.person2.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person2.index_stack[0], colors,
                              unmatched[0][1])
                self.person2.is_used = 1
                match_person += 1
            elif self.person3.is_used == 0:
                self.person3.centerX, self.person3.centerY = self.getCenter(
                    unmatched[0][1])
                self.person3.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person3.index_stack[0], colors,
                              unmatched[0][1])
                self.person3.is_used = 1
                match_person += 1
            elif self.person4.is_used == 0:
                self.person4.centerX, self.person4.centerY = self.getCenter(
                    unmatched[0][1])
                self.person4.index_stack.append(unmatched[0][0])
                self.draw_box(frame_data, self.person4.index_stack[0], colors,
                              unmatched[0][1])
                self.person4.is_used = 1
                match_person += 1
            else:
                print("ERROR : Something problem on object.is_used")

        # Missed Person Over 2
        if match_person <= 3 and len(unmatched) >= 1:
            for unmatch in unmatched:
                if match_person >= 4:
                    break
                else:
                    # Apply center location Euclidean Distance
                    EUD_min = self.get_EuclideanDistance(unmatch)
                    print(EUD_min)
                    if not len(str(EUD_min)) == 0:
                        self.draw_box(frame_data, EUD_min, colors, unmatch[1])
                        match_person += 1

        # if enable info flag then print details about each track
        if info:
            print(
                "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                .format(
                    str(track.track_id), class_name,
                    (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR)

        return result
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                             tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        # get image name by using split method
        image_name = image_path.split('/')[-1]
        image_name = image_name.split('.')[0]

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        infer = saved_model_loaded.signatures['serving_default']
        batch_data = tf.constant(images_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        # run non max suppression on detections
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=0.5,
            score_threshold=0.5)

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = original_image.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        # hold all detection data in one variable
        pred_bbox = [
            bboxes,
            scores.numpy()[0],
            classes.numpy()[0],
            valid_detections.numpy()[0]
        ]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        crop_path = os.path.join(os.getcwd(), 'detections', 'crop')
        try:
            os.mkdir(crop_path)
        except FileExistsError:
            pass
        crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB),
                     pred_bbox, crop_path, allowed_classes)
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            # names = {'6_d': 'Thomas Delaney',
            #          '10_b': 'Leroy Sane',
            #          '18_b': 'Leon Goretzka',
            #          '25_b': 'Thomas Muller',
            #          '5_d': 'Dan-Axel Zagadou',
            #          '12_d': 'Zaragoza',
            #          '4_b': 'Niklas Sule',
            #          '14_d': 'Nico Schulz',
            #          '11_d': 'Marco Reus',
            #          'Referee': 'Referee',
            #          'ball': 'ball',
            #          '10_d': 'Thorgan Hazard',
            #          '6_b': 'Joshua Kimmich ',
            #          'gk_b': 'Ron-Thorben Hoffmann(GK)',
            #          '17_b': 'Jérôme Boateng',
            #          '27_b': 'David Alaba',
            #          '9_d': 'Erling Haaland',
            #          '8_d': 'Mahmoud Dahoud',
            #          'gk_d': 'Luca Unbehaun(GK)',
            #          '19_b': 'Alphonso Davies',
            #          '29_b': 'Kingsley Coman',
            #          '24_d': 'Marcel Schmelzer',
            #          '9_b': 'Robert Lewandowski',
            #          "23_d": 'Emre Can',
            #          }
            # if class_name == 'Referee':
            #     color = (0, 0, 0)
            if class_name == 'ball':
                # color = (255, 255, 255)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
            # else:

            # try:
            #     colors = {'b': (252, 3, 78), 'd': (250, 247, 80)}
            #     color = colors[str(class_name.split('_')[-1])]
            # except KeyError:
            #     pass

            # class_name = names[str(class_name)]
            # color = (250, 247, 80)

            # color = colors[int(track.track_id) % len(colors)]
            # color = [i * 255 for i in color]
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1)
            cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 251, 46), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
Example #24
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = cfg.PATH + '/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size

    # tf
    saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                             tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    while True:
        data = sys.stdin.readline()
        if data:
            data = json.loads(data)
            if data['end']:
                break
            frame = np.array(data['frame_image'], dtype=np.uint8)

            image_data = frame / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)

            # tf
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score)

            # convert data to numpy arrays and slice out unused elements
            num_objects = valid_detections.numpy()[0]
            bboxes = boxes.numpy()[0]
            bboxes = bboxes[0:int(num_objects)]
            scores = scores.numpy()[0]
            scores = scores[0:int(num_objects)]
            classes = classes.numpy()[0]
            classes = classes[0:int(num_objects)]

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(bboxes, original_h, original_w)

            # store all predictions in one parameter for simplicity when calling functions
            pred_bbox = [bboxes, scores, classes, num_objects]

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to customize tracker for only people)
            #allowed_classes = ['person']

            # loop through objects and use class index to get class name, allow only classes in allowed_classes list
            names = []
            deleted_indx = []
            for i in range(num_objects):
                class_indx = int(classes[i])
                class_name = class_names[class_indx]
                if class_name not in allowed_classes:
                    deleted_indx.append(i)
                else:
                    names.append(class_name)
            names = np.array(names)

            # delete detections that are not in allowed_classes
            bboxes = np.delete(bboxes, deleted_indx, axis=0)
            scores = np.delete(scores, deleted_indx, axis=0)

            # encode yolo detections and feed to tracker
            features = encoder(frame, bboxes)
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    bboxes, scores, names, features)
            ]

            # run non-maxima supression
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(
                boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # ds = []
            # for detection in detections:
            #     d = dict()
            #     d["bbox"] = detection.tlwh.tolist()
            #     d["confidence"] = detection.confidence
            #     d["class"] = detection.class_name
            #     ds.append(d)
            #
            # # send data to Node (without tracking...)
            # print(json.dumps(ds))

            #Call the tracker
            tracker.predict()
            tracker.update(detections)

            # Store tracks for json...
            tracks = []

            # update tracks
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                class_name = track.get_class()
                t = dict()
                bbs = track.to_tlbr().tolist()
                t["class"] = class_name
                bbox = dict()
                bbox["left"] = bbs[0]
                bbox["top"] = bbs[1]
                bbox["right"] = bbs[2]
                bbox["bottom"] = bbs[3]
                t["bbox"] = bbox
                t["id"] = track.track_id
                t["score"] = track.detection_actual_score
                tracks.append(t)

            #send data to Node!
            print(json.dumps(tracks))
Example #25
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    # load model
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        # get image name by using split method
        image_name = image_path.split('/')[-1]
        image_name = image_name.split('.')[0]

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        if FLAGS.framework == 'tflite':
            interpreter.allocate_tensors()
            input_details = interpreter.get_input_details()
            output_details = interpreter.get_output_details()
            interpreter.set_tensor(input_details[0]['index'], images_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            infer = saved_model_loaded.signatures['serving_default']
            batch_data = tf.constant(images_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        # run non max suppression on detections
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = original_image.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        # hold all detection data in one variable
        pred_bbox = [
            bboxes,
            scores.numpy()[0],
            classes.numpy()[0],
            valid_detections.numpy()[0]
        ]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to allow detections for only people)
        #allowed_classes = ['person']

        # if crop flag is enabled, crop each detection and save it as new image
        if FLAGS.crop:
            crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                     image_name)
            try:
                os.mkdir(crop_path)
            except FileExistsError:
                pass
            crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB),
                         pred_bbox, crop_path, allowed_classes)

        # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box
        if FLAGS.ocr:
            ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox)

        # if count flag is enabled, perform counting of objects
        if FLAGS.count:
            # count objects found
            counted_classes = count_objects(pred_bbox,
                                            by_class=False,
                                            allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    FLAGS.info,
                                    counted_classes,
                                    allowed_classes=allowed_classes,
                                    read_plate=FLAGS.plate)
        else:
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    FLAGS.info,
                                    allowed_classes=allowed_classes,
                                    read_plate=FLAGS.plate)

        image = Image.fromarray(image.astype(np.uint8))
        if not FLAGS.dont_show:
            image.show()
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
Example #26
0
    def main(self, frame_data):
        # Definition of the parameters
        nms_max_overlap = 1.0

        # set HyperParams
        size = 416
        iou = 0.45
        score = 0.50
        info = False
        people_num = 4

        input_size = size

        self.indexing.queue.clear()

        for k in range(people_num):
            self.indexing.put(k+1)

        out = None

        frame_data = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB)
        image_data = cv2.resize(frame_data, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        batch_data = tf.constant(image_data)
        pred_bbox = self.infer(batch_data)  # Yolo 모델 통과시켜서 바운딩 박스 좌표 반환
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]  # 좌표
            pred_conf = value[:, :, 4:]  # 벡터값

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame_data.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        # allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if count:
            cv2.putText(frame_data, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        2,(0, 255, 0), 2)
            # print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = self.encoder(frame_data, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                      zip(bboxes, scores, names, features)]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # DeepSort Tracking Start

        # Call the tracker
        self.tracker.predict()  # load tracker
        self.tracker.update(detections)

        #check is_confirmed
        is_not_confirmed = 0

        tracks_count = 0
        for w, track in enumerate(self.tracker.tracks):
            tracks_count += 1

        # print('count', tracks_count)

        # update tracks
        for index, track in enumerate(self.tracker.tracks):
            if not track.is_confirmed() or track.time_since_update > 1:
                is_not_confirmed += 1
                continue
            if index-is_not_confirmed+1 > people_num:
                break

            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen           # 이거 처리까지 하고 나서 보내야 할 것 같다.

            for i in range(self.indexing.qsize()):
                check_index = self.indexing.get()
                if track.track_id == check_index:
                    color = colors[int(track.track_id)*8 % len(colors)]
                    color = [j * 255 for j in color]
                    cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1] - 30)),
                                  (int(bbox[0]) + (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])),
                                  color, -1)
                    cv2.putText(frame_data, class_name + "-" + str(track.track_id),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (255, 255, 255), 2)
                    break
                else:
                    self.indexing.put(check_index)

                if i == self.indexing.qsize() - 1:
                    cng_index = self.indexing.get()
                    print('index changed', track.track_id, '->', cng_index)

                    # track.track_id = cng_index

                    color = colors[int(cng_index)*8 % len(colors)]
                    color = [j * 255 for j in color]
                    cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(frame_data, (int(bbox[0]), int(bbox[1] - 30)),
                                  (int(bbox[0]) + (len(class_name) + len(str(cng_index))) * 17, int(bbox[1])),
                                  color, -1)
                    cv2.putText(frame_data, class_name + "-" + str(cng_index),
                                (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                                (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if info:
                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id),
                                                                                                    class_name, (
                                                                                                        int(bbox[0]),
                                                                                                        int(bbox[1]),
                                                                                                        int(bbox[2]),
                                                                                                        int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        # print("FPS: %.2f" % fps)
        result = cv2.cvtColor(frame_data, cv2.COLOR_RGB2BGR)

        return result
Example #27
0
    def _capture_loop(self):
        dt = 1 / self.fps
        self.FLAGS = self.FLAGS()
        config = ConfigProto()
        config.gpu_options.allow_growth = True
        session = InteractiveSession(config=config)
        STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(self.FLAGS)
        input_size = self.FLAGS.size

        saved_model_loaded = tf.saved_model.load(self.FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']
        logger.debug("Observation started")

        out = None

        while self.isrunning:
            return_value, frame = self.vid.read()
            if return_value:
                if len(self.frames) == self.max_frames:
                    self.frames = self.frames[1:]
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame)

                frame_size = frame.shape[:2]
                image_data = cv2.resize(frame, (input_size, input_size))
                image_data = image_data / 255.
                image_data = image_data[np.newaxis, ...].astype(np.float32)
                start_time = time.time()

                batch_data = tf.constant(image_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

                boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                    scores=tf.reshape(
                        pred_conf,
                        (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                    max_output_size_per_class=50,
                    max_total_size=50,
                    iou_threshold=self.FLAGS.iou,
                    score_threshold=self.FLAGS.score)

                # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
                original_h, original_w, _ = frame.shape
                bboxes = utils.format_boxes(boxes.numpy()[0], original_h,
                                            original_w)

                pred_bbox = [
                    bboxes,
                    scores.numpy()[0],
                    classes.numpy()[0],
                    valid_detections.numpy()[0]
                ]

                image = utils.draw_bbox(frame, pred_bbox, self.FLAGS.info)

                self.frames.append(frame)
            time.sleep(dt)
        logger.info("Thread stopped successfully")
Example #28
0
def main(_argv):
    # Definition of the parameters
    nms_max_overlap = 1.0

    # initialize deep sort parameters
    encoder, tracker = init_deepsort_params()

    # load configuration for object detector
    input_size, video_path = load_obj_detector_cfg()

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        tfl = tf_lite_ngine()
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    vid = get_video_stream(video_path)

    # get video ready to save locally if flag is set
    out = None
    if FLAGS.output:
        out = init_video_out(vid)

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        frame_num += 1
        print('Frame #: ', frame_num)

        frame_size = frame.shape[:2]
        image_data = init_image_data(frame, input_size)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            boxes, pred_conf = tfl.detect(image_data)
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = apply_tf_nms(
            boxes, pred_conf)

        # convert data to numpy arrays and slice out unused elements
        num_objects, bboxes, scores, classes = detections_to_np_array(
            valid_detections, boxes, scores, classes)

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # fetch allowed object classes, ignore the rest classes
        names, deleted_indx = get_allowed_obj_classes(classes, num_objects)
        if FLAGS.count:
            show_tracked_object_count(names, frame)

        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # process detections with YOLO tracker
        frame = process_detections(tracker, detections, nms_max_overlap, frame)

        # calculate and print frames per second of running detections
        print_fps(start_time, time.time(), frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
Example #29
0
def main(_argv):

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    #images = FLAGS.images
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0

    if FLAGS.shirt:
        allowed_classes = ['Shirt']
        #ROI =
    if FLAGS.trouser:
        allowed_classes = ['Trousers']
    if FLAGS.jeans:
        allowed_classes = ['Jeans']
    if FLAGS.dress:
        allowed_classes = ['Dress']
    if FLAGS.footwear:
        allowed_classes = ['Footwear']
    if FLAGS.jacket:
        allowed_classes = ['Jacket']
    if FLAGS.skirt:
        allowed_classes = ['Skirt']
    if FLAGS.suit:
        allowed_classes = ['Suit']

    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

            if FLAGS.color:
                PATH = './training.data'
                #(int(bbox[0])):(int(bbox[2])), (int(bbox[1])):(int(bbox[3]))
                #ROI = frame[(int(bbox[0]) +50) :(int(bbox[2]) - 50), (int(bbox[1])+ 50):(int(bbox[3])-50)]
                #ROI = frame[(int(bbox[1])) +15 :(int(bbox[3])-15),(int(bbox[0])+15):(int(bbox[2])-15)]
                ROI = frame[int((int(bbox[1]) + int(bbox[3])) /
                                2):int((int(bbox[1]) + int(bbox[3])) / 2) + 1,
                            int((int(bbox[0]) + int(bbox[2])) /
                                2):int((int(bbox[0]) + int(bbox[2])) / 2) + 1]
                #ROI = frame[(int(bbox[1])):(int(bbox[3])),(int(bbox[0])):(int(bbox[2]))]
                #ROI = frame[int(0.5* (int(bbox[1] - 50)+ int(bbox[3] + 50))),int(0.5*(int(bbox[0] - 50) +int(bbox[2] + 50 )))]
                #print(ROI)

                color_histogram_feature_extraction.color_histogram_of_test_image(
                    ROI)
                prediction = knn_classifier.main('training.data', 'test.data')
                #prediction = 'red'
                red = load_red('test.data')
                Red = str(red)
                #Red = str(Red_1)

                print('this is the variable of the red:- ' + str(Red))
                green = load_green('test.data')
                Green = str(green)
                #Green = str(Green_1)
                print('this is the variable of the green:- ' + str(Green))
                blue = load_blue('test.data')
                #Blue_1 = int(blue)
                Blue = str(blue)
                print('this is the variable of the blue:- ' + str(Blue))

                #hsv = rgb_to_hsv(red,green,blue)
                #print("HSV: " + str(hsv))

                if red and blue and green != None:
                    HLS = colorsys.rgb_to_hls(red, green, blue)
                    HUE = int(HLS[0])
                    Light = int(HLS[1])
                    Saturation = int(HLS[2])

                print("HLS is equal to", HLS)
                print('HUE: ', HUE)
                print('LIGHT: ', Light)
                print('Saturation', Saturation)

                if red and blue and green != None:
                    HSV = rgb_to_hsv(red, green, blue)
                    HUE_1 = int(HSV[0])
                    Saturation_1 = int(HSV[1])
                    Value = int(HSV[2])

                print("HSV is equal to", HSV)
                print('Hue: ', HUE_1)
                print('saturation: ', Saturation_1)
                print('value', Value)

                print(str(prediction) + " " + str(class_name))

            if FLAGS.Fuzzy_black:
                #if str(59.7) <= Red < str(200.9)  and  str(74) <= Blue < str(207) and str(70) <= Green < str(203):
                if 0 <= HUE_1 < 210 and 0 <= Saturation_1 < 41 and 0 <= Value < 86:
                    print("THIS IS THE black COLOR yaaaaaaaaaaaaaaaaaaaa")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BLACK" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_red:
                #if  str(139) <= Red < str(255)  and  str(0) <= Green < str(160) and str(0) <= Blue < str(128):
                if 0 <= HUE_1 < 348 and 47 <= Saturation_1 < 100 and 55 <= Value < 100:
                    print(
                        "THIS IS THE red COLOR redddddddddddddddddddddddddddddddddddd"
                    )
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "RED" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_orange:
                #if  str(255) <= Red < str(255)  and  str(69) <= Green < str(165) and str(0) <= Blue < str(80):
                if 9 <= HUE_1 < 39 and 69 <= Saturation_1 < 100 and Value == 100:
                    print(
                        "THIS IS THE ORANGE COLOR orangeeeeeeeeeeeeeeeeeeeeeeee"
                    )
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "ORANGE" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_yellow:
                #if  str(189) <= Red < str(255)  and  str(183) <= Green < str(255) and str(0) <= Blue < str(224):
                if 0 <= HUE_1 < 56 and 12 <= Saturation_1 < 100 and 74 <= Value < 100:
                    print("THIS IS THE YELLOW COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "YELLOW" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_blue:
                #if  str(0) <= Red < str(176)  and  str(0) <= Green < str(244) and str(112) <= Blue < str(255):
                if 187 <= HUE_1 < 240 and 21 <= Saturation_1 < 100 and 44 <= Value < 100:
                    print("THIS IS THE BLUE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BLUE" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_white:
                #if  str(240) <= Red < str(255)  and  str(228) <= Green < str(255) and str(215) <= Blue < str(255):
                if 0 <= HUE_1 < 340 and 0 <= Saturation_1 < 14 and 96 <= Value < 100:
                    print("THIS IS THE WHITE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "WHITE" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_purple:
                #if  str(72) <= Red < str(255)  and  str(0) <= Green < str(230) and str(128) <= Blue < str(255):
                if 0 <= HUE_1 < 302 and 8 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE PURPLE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "PURPLE" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_green:
                #if  str(0) <= Red < str(173)  and  str(100) <= Green < str(255) and str(0) <= Blue < str(170):
                if 0 <= HUE_1 < 160 and 24 <= Saturation_1 < 100 and 39 <= Value < 100:
                    print("THIS IS THE green COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "GREEN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_brown:
                #if  str(128) <= Red < str(255)  and  str(0) <= Green < str(248) and str(0) <= Blue < str(288):
                if 0 <= HUE_1 < 48 and 14 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE BROWN COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BROWN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_cyan:
                #if  str(0) <= Red < str(244)  and  str(128) <= Green < str(255) and str(128) <= Blue < str(255):
                if 0 <= HUE_1 < 182 and 12 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE CYAN COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "CYAN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_pink:
                #if  str(199) <= Red < str(255)  and  str(20) <= Green < str(192) and str(133) <= Blue < str(203):
                if 322 <= HUE_1 < 351 and 25 <= Saturation_1 < 92 and 78 <= Value < 100:
                    print("THIS IS THE PINK COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "PINK" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            if FLAGS.black:
                if prediction == 'black':
                    #ROI = frame[int((int(bbox[1]) + int(bbox[3]))/2):int((int(bbox[1]) + int(bbox[3]))/2)+1,int((int(bbox[0]) + int(bbox[2]))/2):int((int(bbox[0]) + int(bbox[2]))/2)+1]
                    #color_histogram_feature_extraction.color_histogram_of_test_image(ROI)
                    #prediction = knn_classifier.main('training.data','test.data')
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.blue:
                if prediction == 'blue':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.red:
                if prediction == 'red':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.yellow:
                if prediction == 'yellow':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.orange:
                if prediction == 'orange':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.violet:
                if prediction == 'violet':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.white:
                if prediction == 'white':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.green:
                if prediction == 'green':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
                #cv2.putText(frame, class_name + " " + str(prediction) + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
                #print('ferture data:' +" " +  feature_data)
                #result_1 = np.asarray(frame)
                #result_1 = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                #cv2.imshow('color classifier', result_1)
                #print(color_histogram_feature_extraction.feature_data)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
Example #30
0
def main(_argv):
    global lock, buffer, bts
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 0.8

    counter = []
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    input_size = FLAGS.size

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    frame_num = 0

    # while video is running
    while True:
        with lock:
            bts += buffer
            buffer = b''
        time.sleep(0.01)

        # jpghead = bts.find(b'\xff\xd8')
        # jpgend = bts.find(b'\xff\xd9')
        # if jpghead < 0 or jpgend < 0:
        #     continue
        # if jpgend < jpghead:
        #     # raise Exception("{}..{}".format(jpghead, jpgend))
        #     continue
        # print(jpghead, jpgend)
        # jpg=bts[jpghead:jpgend+2]
        # bts=bts[jpgend+2:]

        jpghead = bts.find(b'\xff\xd8')
        if jpghead >= 0:
            bts = bts[jpghead:]
            jpgend = bts.find(b'\xff\xd9')
        if jpghead < 0 or jpgend < 0:
            continue
        print(jpghead, jpgend)
        jpg = bts[0:jpgend + 2]
        bts = bts[jpgend + 2:]

        img = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8),
                           cv2.IMREAD_UNCHANGED)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(img, (640, 480))

        cv2.imshow('input', frame)
        cv2.waitKey(1)

        frame_num += 1
        print('Frame #: ', frame_num)
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        bts = bts[-6000:]

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for value in pred_bbox.values():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=15,
            max_total_size=15,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        current_count = int(0)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            pts[track.track_id].append(center)

            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[
                        track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), color, thickness)

            height, width, _ = frame.shape
            cv2.line(frame, (0, int(3 * height / 6 + height / 2)),
                     (width, int(3 * height / 6 + height / 2)), (0, 255, 0),
                     thickness=2)
            cv2.line(frame, (0, int(3 * height / 6 - height / 2)),
                     (width, int(3 * height / 6 - height / 2)), (0, 255, 0),
                     thickness=2)

            center_y = int(((bbox[1]) + (bbox[3])) / 2)

            if center_y <= int(3 * height / 6 +
                               height / 2) and center_y >= int(3 * height / 6 -
                                                               height / 2):
                if class_name == 'Among_Us_Alive' or class_name == 'Among_Us_Dead':
                    counter.append(int(track.track_id))
                    current_count += 1

        # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        total_count = len(set(counter))
        cv2.putText(frame, "Current Figurine Count: " + str(current_count),
                    (0, 80), 0, 1, (0, 0, 255), 2)
        cv2.putText(frame, "Total Figurine Count: " + str(total_count),
                    (0, 130), 0, 1, (0, 0, 255), 2)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)
        # with lock:
        #     bts = bts[:5000]
        # time.sleep(0.01)

        if cv2.waitKey(1) & 0xFF == ord('q'): break