コード例 #1
0
 def detect(self):
     if FLAGS.framework == 'tflite':
         interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
         interpreter.allocate_tensors()
         input_details = interpreter.get_input_details()
         output_details = interpreter.get_output_details()
         print(input_details)
         print(output_details)
         interpreter.set_tensor(input_details[0]['index'], images_data)
         interpreter.invoke()
         pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
         if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
             boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
         else:
             boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
     else:
         saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
         infer = saved_model_loaded.signatures['serving_default']
         batch_data = tf.constant(images_data)
         pred_bbox = infer(batch_data)
     for key, value in pred_bbox.items():
         boxes = value[:, :, 0:4]
         pred_conf = value[:, :, 4:]
         box = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4))
         score = tf.reshape(pred_conf, (tf.shape(pred_conf)[0], box.shape[1], -1))
         boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(boxes=box,
                                                                                      scores=score,
                                                                                      max_output_size_per_class=50,
                                                                                      max_total_size=50,
                                                                                      iou_threshold=FLAGS.iou,
                                                                                      score_threshold=FLAGS.score)
     pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
     return pred_bbox
コード例 #2
0
def inference(framework, images_data, model, tiny, saved_model_loaded, iou, score):
    if framework == 'tflite':
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        interpreter.set_tensor(input_details[0]['index'], images_data)
        interpreter.invoke()
        pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        if model == 'yolov3' and tiny == True:
            boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
    else:
        infer = saved_model_loaded.signatures['serving_default']
        batch_data = tf.constant(images_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

        # run non max suppression on detections
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=score
        )
    return boxes, scores, classes, valid_detections    
コード例 #3
0
ファイル: save_model.py プロジェクト: hsji0/JHS
def save_tf():
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    print("load_config XYSCALE:{}".format(XYSCALE))
    input_layer = tf.keras.layers.Input(
        [FLAGS.input_size, FLAGS.input_size, 3])
    feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model,
                        FLAGS.num_detection_layer)
    bbox_tensors = []
    prob_tensors = []
    if FLAGS.num_detection_layer == 1:  # yolo-custom
        output_tensors = decode(feature_maps[0], FLAGS.input_size // 32,
                                NUM_CLASS, STRIDES, ANCHORS, 0, XYSCALE,
                                FLAGS.framework)
        bbox_tensors.append(output_tensors[0])
        prob_tensors.append(output_tensors[1])
    elif FLAGS.num_detection_layer == 2:  # yolo-tiny
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    elif FLAGS.num_detection_layer == 3:  # yolo
        for i, fm in enumerate(feature_maps):
            print("i:{}".format(i))
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            elif i == 1:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)
    if FLAGS.framework == 'tflite':
        pred = (pred_bbox, pred_prob)
    else:
        boxes, pred_conf = filter_boxes(
            pred_bbox,
            pred_prob,
            score_threshold=FLAGS.score_thres,
            input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size]))
        pred = tf.concat([boxes, pred_conf], axis=-1)
    model = tf.keras.Model(input_layer, pred)
    utils.load_weights(model, FLAGS.weights, FLAGS.model,
                       FLAGS.num_detection_layer)
    model.summary()
    model.save(FLAGS.output)
コード例 #4
0
def detect_flowers(original_image):

    image_data = cv2.resize(original_image, (FLAGS.size, FLAGS.size)) / 255.0
    image_data = np.asarray([image_data]).astype(np.float32)

    detection_interpreter.set_tensor(detection_input_details[0]['index'],
                                     image_data)
    detection_interpreter.invoke()

    pred = [
        detection_interpreter.get_tensor(detection_output_details[i]['index'])
        for i in range(len(detection_output_details))
    ]
    boxes, pred_conf = filter_boxes(pred[0],
                                    pred[1],
                                    score_threshold=0.25,
                                    input_shape=tf.constant(
                                        [FLAGS.size, FLAGS.size]))

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score)

    pred_bbox = [
        boxes.numpy(),
        scores.numpy(),
        classes.numpy(),
        valid_detections.numpy()
    ]
    return utils.detect_coordinates(original_image, pred_bbox)
コード例 #5
0
def save_tf():
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)

    input_layer = tf.keras.layers.Input(
        [FLAGS.input_size, FLAGS.input_size, 3])
    feature_maps = YOLOv4(input_layer, NUM_CLASS)
    bbox_tensors = []
    prob_tensors = []

    for i, fm in enumerate(feature_maps):
        if i == 0:
            output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS,
                                    STRIDES, ANCHORS, i, XYSCALE)
        elif i == 1:
            output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                    STRIDES, ANCHORS, i, XYSCALE)
        else:
            output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                    STRIDES, ANCHORS, i, XYSCALE)
        bbox_tensors.append(output_tensors[0])
        prob_tensors.append(output_tensors[1])

    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)

    boxes, pred_conf = filter_boxes(pred_bbox,
                                    pred_prob,
                                    score_threshold=FLAGS.score_thres,
                                    input_shape=tf.constant(
                                        [FLAGS.input_size, FLAGS.input_size]))
    pred = tf.concat([boxes, pred_conf], axis=-1)
    model = tf.keras.Model(input_layer, pred)
    utils.load_weights(model, FLAGS.weights, FLAGS.model)
    model.summary()
    model.save(FLAGS.output)
コード例 #6
0
def save_tf():
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)

    input_layer = tf.keras.layers.Input(
        [FLAGS.input_size, FLAGS.input_size, 3])
    feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny)
    bbox_tensors = []
    prob_tensors = []
    if FLAGS.tiny:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    else:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            elif i == 1:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)
    if FLAGS.framework == 'tflite':
        pred = (pred_bbox, pred_prob)
    else:
        boxes, pred_conf = filter_boxes(
            pred_bbox,
            pred_prob,
            score_threshold=FLAGS.score_thres,
            input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size]))
        pred = tf.concat([boxes, pred_conf], axis=-1)
    model = tf.keras.Model(input_layer, pred)
    model.load_weights(FLAGS.weights)
    #utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny) #weight파일일 경

    #model.summary()
    #model.save('/checkpoints/yolov4-416')
    #model.save(FLAGS.output, save_format = 'tf')
    tf.saved_model.save(
        model, FLAGS.output
    )  #현재 이 저장 형태의 경우: assets/, variables/, saved_model.pb 형태로 저장됨.
def save_tf():
    if FLAGS.license:
        cfg.YOLO.CLASSES = "./data/classes/custom.names"
    else:
        cfg.YOLO.CLASSES = "./data/classes/char.names"
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    #print(read_class_names(cfg.YOLO.CLASSES))

    input_layer = tf.keras.layers.Input(
        [FLAGS.input_size, FLAGS.input_size, 3])
    feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny)
    bbox_tensors = []
    prob_tensors = []
    if FLAGS.tiny:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    else:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            elif i == 1:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)
    if FLAGS.framework == 'tflite':
        pred = (pred_bbox, pred_prob)
    else:
        boxes, pred_conf = filter_boxes(
            pred_bbox,
            pred_prob,
            score_threshold=FLAGS.score_thres,
            input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size]))
        pred = tf.concat([boxes, pred_conf], axis=-1)
    model = tf.keras.Model(input_layer, pred)
    utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny)
    model.summary()
    model.save(FLAGS.output)
コード例 #8
0
def save_tf():
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_layer = tf.keras.layers.Input(
        [FLAGS.input_size, FLAGS.input_size, 3])
    feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny)
    bbox_tensors = []
    prob_tensors = []
    if FLAGS.tiny:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])
    else:
        for i, fm in enumerate(feature_maps):
            if i == 0:
                output_tensors = decode(fm, FLAGS.input_size // 8, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            elif i == 1:
                output_tensors = decode(fm, FLAGS.input_size // 16, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            else:
                output_tensors = decode(fm, FLAGS.input_size // 32, NUM_CLASS,
                                        STRIDES, ANCHORS, i, XYSCALE,
                                        FLAGS.framework)
            bbox_tensors.append(output_tensors[0])
            prob_tensors.append(output_tensors[1])

    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)

    if FLAGS.framework == 'tflite':
        pred = (pred_bbox, pred_prob)
    else:
        boxes, pred_conf = filter_boxes(
            pred_bbox,
            pred_prob,
            score_threshold=FLAGS.score_thres,
            input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size]))
        pred = tf.concat([boxes, pred_conf], axis=-1)

    model = tf.keras.Model(input_layer, pred)
    model.load_weights(FLAGS.input_model_path)
    model.summary()
    # model.save(FLAGS.output_model_path)
    return model
コード例 #9
0
 def detect(self, image_data):
     self.interpreter.set_tensor(self.input_details[0]['index'], image_data)
     self.interpreter.invoke()
     pred = [
         self.interpreter.get_tensor(self.output_details[i]['index'])
         for i in range(len(self.output_details))
     ]
     # run detections using yolov3 if flag is set
     if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
         boxes, pred_conf = filter_boxes(pred[1],
                                         pred[0],
                                         score_threshold=0.25,
                                         input_shape=tf.constant(
                                             [input_size, input_size]))
     else:
         boxes, pred_conf = filter_boxes(pred[0],
                                         pred[1],
                                         score_threshold=0.25,
                                         input_shape=tf.constant(
                                             [input_size, input_size]))
     return boxes, pred_conf
コード例 #10
0
def _pred(infer, file_name, original_image, images_data, input_details=None, output_details=None):
    input_size = FLAGS.size
    if FLAGS.framework == 'tflite':
        infer.set_tensor(input_details[0]['index'], images_data)
        infer.invoke()
        pred = [infer.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        if FLAGS.model == 'yolov3' and FLAGS.tiny:
            boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                            input_shape=tf.constant([input_size, input_size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                            input_shape=tf.constant([input_size, input_size]))
    else:
        infer = infer.signatures['serving_default']
        batch_data = tf.constant(images_data)
        pred_bbox = infer(batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score
    )
    pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
    image = utils.draw_bbox(original_image, pred_bbox)
    #image = utils.draw_bbox(image_data*255, pred_bbox)
    image = Image.fromarray(image.astype(np.uint8))
    #image.show()
    image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    cv2.imwrite(os.path.join(FLAGS.output, file_name), image)
    num_detect = pred_bbox[3][0]
    scores = pred_bbox[1][0]
    bboxs = pred_bbox[0][0]
    return num_detect, scores, bboxs
コード例 #11
0
    def get_bbox(self, image_bytes):

        STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
        input_size = CONFIG.image_size
        cameraId = image_bytes["CameraId"]
        nparr = np.frombuffer(image_bytes["ImageBytes"], np.uint8)
        original_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        # print("Image shape: ",original_image.shape)
        image_h, image_w, _ = original_image.shape
        image_data = cv2.resize(original_image, (input_size[0], input_size[1]))
        image_data = image_data / 255.

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        self.interpreter.allocate_tensors()
        input_details = self.interpreter.get_input_details()
        output_details = self.interpreter.get_output_details()
        self.interpreter.set_tensor(input_details[0]['index'], images_data)
        self.interpreter.invoke()
        pred = [
            self.interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
        boxes, pred_conf = filter_boxes(pred[0],
                                        pred[1],
                                        score_threshold=0.25,
                                        input_shape=tf.constant(
                                            [input_size[0], input_size[1]]))

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=CONFIG.iou,
            score_threshold=CONFIG.score)
        pred_bbox = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        detections = utils.bbox_details(original_image, pred_bbox)

        return original_image, detections, classes.numpy()
コード例 #12
0
def save_tf(parameters):
    """Transform a darknet model of YOLO to a TensorFlow model

    Args:
        parameters (dictionary): input parameters
        - weights: path to the darknet weights
        - input_size: input size of the model
        - model: model to transform
        - weights_tf: path to save the tf weights
    Returns:
        [void]:
    """
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(
        tiny=False, model=parameters['model'])

    input_layer = tf.keras.layers.Input(
        [parameters['input_size'], parameters['input_size'], 3])
    feature_maps = YOLO(input_layer, NUM_CLASS, parameters['model'], False)
    bbox_tensors = []
    prob_tensors = []
    for i, fm in enumerate(feature_maps):
        if i == 0:
            output_tensors = decode(fm, parameters['input_size'] // 8,
                                    NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE,
                                    'tf')
        elif i == 1:
            output_tensors = decode(fm, parameters['input_size'] // 16,
                                    NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE,
                                    'tf')
        else:
            output_tensors = decode(fm, parameters['input_size'] // 32,
                                    NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE,
                                    'tf')
        bbox_tensors.append(output_tensors[0])
        prob_tensors.append(output_tensors[1])
    pred_bbox = tf.concat(bbox_tensors, axis=1)
    pred_prob = tf.concat(prob_tensors, axis=1)

    boxes, pred_conf = filter_boxes(pred_bbox,
                                    pred_prob,
                                    score_threshold=parameters['score_thres'],
                                    input_shape=tf.constant([
                                        parameters['input_size'],
                                        parameters['input_size']
                                    ]))
    pred = tf.concat([boxes, pred_conf], axis=-1)
    model = tf.keras.Model(input_layer, pred)
    utils.load_weights(model, parameters['weights'], parameters['model'],
                       False)
    model.summary()
    model.save(parameters['weights_tf'])
コード例 #13
0
    def model_inference(self, image_input, interpreter, input_details,
                        output_details):

        interpreter.set_tensor(input_details[0]['index'], image_input)
        interpreter.invoke()
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]

        boxes, pred_conf = filter_boxes(
            pred[0],
            pred[1],
            score_threshold=0.25,
            input_shape=tf.constant([self.input_size, self.input_size]))

        return boxes, pred_conf
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    # initiating pyttsx3 engine and thread
    engine = pyttsx3.init()
    thread = threading.Thread()

    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=5,
            max_total_size=10,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)
        pred_bbox = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        image = utils.draw_bbox(frame, pred_bbox)
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("result", result)

        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            engine.stop()
            break

        # ========== voiceFeedback ==========

        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
        allowed_classes = list(class_names.values())
        valid_items = pred_bbox[3][0]
        valid_classes = pred_bbox[2][0]
        valid_boxes = pred_bbox[0][0]
        # section = (input_size/3)
        (H, W) = frame.shape[:2]
        res = []
        for i in range(valid_items):
            (top, left, bottom, right) = valid_boxes[i]

            centerX = round((right + left) / 2)
            centerY = round((top + bottom) / 2)
            if centerX <= W / 3:
                w_pos = 'left '
            elif centerX <= (W / 3 * 2):
                w_pos = 'center '
            else:
                w_pos = 'right '

            if centerY <= H / 3:
                h_pos = 'top '
            elif centerY <= (H / 3 * 2):
                h_pos = 'mid '
            else:
                h_pos = 'bottom '
            res.append(h_pos + w_pos + allowed_classes[int(valid_classes[i])])

        description = ', '.join(res)

        # Using pyttsx3 to play sound directly without saving the file via a thread
        if (not thread.is_alive()):
            thread.__init__(name="texToSpeech",
                            target=textToSpeech,
                            args=[engine, description])
            thread.start()

        # ========= endVoiceFeedback ==========

    cv2.destroyAllWindows()
コード例 #15
0
ファイル: object_tracker.py プロジェクト: fjordss/ds_net
def iterate(lines, model, vid, frame_num):
    tracks = []

    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)
    else:
        print('Video has ended or failed, try a different video format!')
        cv2.destroyAllWindows()
        return False, tracks

    frame_size = frame.shape[:2]
    image_data = cv2.resize(frame, (FLAGS.size, FLAGS.size))
    image_data = image_data / 255.
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    start_time = time.time()

    # run detections on tflite if flag is set
    if FLAGS.framework == 'tflite':
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        pred = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]
        # run detections using yolov3 if flag is set
        if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
            boxes, pred_conf = filter_boxes(pred[1],
                                            pred[0],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
        else:
            boxes, pred_conf = filter_boxes(pred[0],
                                            pred[1],
                                            score_threshold=0.25,
                                            input_shape=tf.constant(
                                                [FLAGS.size, FLAGS.size]))
    else:
        batch_data = tf.constant(image_data)
        pred_bbox = model.signatures['serving_default'](batch_data)
        for key, value in pred_bbox.items():
            boxes = value[:, :, 0:4]
            pred_conf = value[:, :, 4:]

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
        scores=tf.reshape(
            pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
        max_output_size_per_class=50,
        max_total_size=50,
        iou_threshold=FLAGS.iou,
        score_threshold=FLAGS.score)

    # convert data to numpy arrays and slice out unused elements
    num_objects = valid_detections.numpy()[0]
    bboxes = boxes.numpy()[0]
    bboxes = bboxes[0:int(num_objects)]
    scores = scores.numpy()[0]
    scores = scores[0:int(num_objects)]
    classes = classes.numpy()[0]
    classes = classes[0:int(num_objects)]

    # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
    original_h, original_w, _ = frame.shape
    bboxes = utils.format_boxes(bboxes, original_h, original_w)

    # store all predictions in one parameter for simplicity when calling functions
    pred_bbox = [bboxes, scores, classes, num_objects]

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    # by default allow all classes in .names file
    #allowed_classes = list(class_names.values())

    # custom allowed classes (uncomment line below to customize tracker for only people)
    #allowed_classes = ['person']
    allowed_classes = ['car', 'bus', 'truck']

    # loop through objects and use class index to get class name, allow only classes in allowed_classes list
    names = []
    deleted_indx = []
    for i in range(num_objects):
        class_indx = int(classes[i])
        class_name = class_names[class_indx]
        if class_name not in allowed_classes:
            deleted_indx.append(i)
        else:
            names.append(class_name)

    names = np.array(names)
    count = len(names)
    if FLAGS.count:
        cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
        print("Objects being tracked: {}".format(count))

    # delete detections that are not in allowed_classes
    bboxes = np.delete(bboxes, deleted_indx, axis=0)
    scores = np.delete(scores, deleted_indx, axis=0)

    # encode yolo detections and feed to tracker
    features = encoder(frame, bboxes)
    detections = [
        Detection(bbox, score, class_name,
                  feature) for bbox, score, class_name, feature in zip(
                      bboxes, scores, names, features)
    ]

    #initialize color map
    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

    # run non-maxima supression
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap,
                                                scores)
    detections = [detections[i] for i in indices]

    # Call the tracker
    tracker.predict()
    tracker.update(detections)

    # update tracks
    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        bbox = track.to_tlbr()
        class_name = track.get_class()

        tracks.append(
            Rect(track.track_id, (int(bbox[0]), int(bbox[1])),
                 (int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]))))

        # draw bbox on screen
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]
        cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                      (int(bbox[2]), int(bbox[3])), color, 2)
        cv2.rectangle(
            frame, (int(bbox[0]), int(bbox[1] - 30)),
            (int(bbox[0]) +
             (len(class_name) + len(str(track.track_id))) * 17, int(bbox[1])),
            color, -1)
        cv2.putText(frame, class_name + "-" + str(track.track_id),
                    (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                    (255, 255, 255), 2)

        # if enable info flag then print details about each track
        #if FLAGS.info:
        #    print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

    for line in lines:
        cv2.line(frame, line.pt1, line.pt2, line.color, 3)
        cv2.line(frame, line.vertor_pt1, line.vertor_pt2, (255, 255, 0), 2)
        cv2.putText(frame, str(line.count), line.center,
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.putText(frame, str(frame_num), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (255, 255, 0), 2)

    # calculate frames per second of running detections
    fps = 1.0 / (time.time() - start_time)
    print("FPS: %.2f" % fps)
    result = np.asarray(frame)
    result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    if not FLAGS.dont_show:
        cv2.imshow("Output Video", result)

    # if output flag is set, save video file
    if FLAGS.output:
        out.write(result)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        return False, tracks

    return True, tracks
コード例 #16
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    # load model
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        # get image name by using split method
        image_name = image_path.split('/')[-1]
        image_name = image_name.split('.')[0]

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        if FLAGS.framework == 'tflite':
            interpreter.allocate_tensors()
            input_details = interpreter.get_input_details()
            output_details = interpreter.get_output_details()
            interpreter.set_tensor(input_details[0]['index'], images_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            infer = saved_model_loaded.signatures['serving_default']
            batch_data = tf.constant(images_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        # run non max suppression on detections
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = original_image.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        # hold all detection data in one variable
        pred_bbox = [
            bboxes,
            scores.numpy()[0],
            classes.numpy()[0],
            valid_detections.numpy()[0]
        ]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to allow detections for only people)
        #allowed_classes = ['person']

        # if crop flag is enabled, crop each detection and save it as new image
        if FLAGS.crop:
            crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                     image_name)
            try:
                os.mkdir(crop_path)
            except FileExistsError:
                pass
            crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB),
                         pred_bbox, crop_path, allowed_classes)

        # if ocr flag is enabled, perform general text extraction using Tesseract OCR on object detection bounding box
        if FLAGS.ocr:
            ocr(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB), pred_bbox)

        # if count flag is enabled, perform counting of objects
        if FLAGS.count:
            # count objects found
            counted_classes = count_objects(pred_bbox,
                                            by_class=False,
                                            allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    FLAGS.info,
                                    counted_classes,
                                    allowed_classes=allowed_classes,
                                    read_plate=FLAGS.plate)
        else:
            image = utils.draw_bbox(original_image,
                                    pred_bbox,
                                    FLAGS.info,
                                    allowed_classes=allowed_classes,
                                    read_plate=FLAGS.plate)

        image = Image.fromarray(image.astype(np.uint8))
        if not FLAGS.dont_show:
            image.show()
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
コード例 #17
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            # names = {'6_d': 'Thomas Delaney',
            #          '10_b': 'Leroy Sane',
            #          '18_b': 'Leon Goretzka',
            #          '25_b': 'Thomas Muller',
            #          '5_d': 'Dan-Axel Zagadou',
            #          '12_d': 'Zaragoza',
            #          '4_b': 'Niklas Sule',
            #          '14_d': 'Nico Schulz',
            #          '11_d': 'Marco Reus',
            #          'Referee': 'Referee',
            #          'ball': 'ball',
            #          '10_d': 'Thorgan Hazard',
            #          '6_b': 'Joshua Kimmich ',
            #          'gk_b': 'Ron-Thorben Hoffmann(GK)',
            #          '17_b': 'Jérôme Boateng',
            #          '27_b': 'David Alaba',
            #          '9_d': 'Erling Haaland',
            #          '8_d': 'Mahmoud Dahoud',
            #          'gk_d': 'Luca Unbehaun(GK)',
            #          '19_b': 'Alphonso Davies',
            #          '29_b': 'Kingsley Coman',
            #          '24_d': 'Marcel Schmelzer',
            #          '9_b': 'Robert Lewandowski',
            #          "23_d": 'Emre Can',
            #          }
            # if class_name == 'Referee':
            #     color = (0, 0, 0)
            if class_name == 'ball':
                # color = (255, 255, 255)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 1)
            # else:

            # try:
            #     colors = {'b': (252, 3, 78), 'd': (250, 247, 80)}
            #     color = colors[str(class_name.split('_')[-1])]
            # except KeyError:
            #     pass

            # class_name = names[str(class_name)]
            # color = (250, 247, 80)

            # color = colors[int(track.track_id) % len(colors)]
            # color = [i * 255 for i in color]
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1])), (int(bbox[2]), int(bbox[3])), color, 1)
            # cv2.rectangle(frame, (int(bbox[0]), int(
            #     bbox[1]-30)), (int(bbox[0])+(len(str(class_name)))*17, int(bbox[1])), color, -1)
            cv2.putText(frame, class_name, (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 251, 46), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cv2.destroyAllWindows()
コード例 #18
0
ファイル: evaluate.py プロジェクト: yfh15882384041/Yolov5_tf
def main(_argv):
    INPUT_SIZE = FLAGS.size
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)

    predicted_dir_path = './mAP/predicted'
    ground_truth_dir_path = './mAP/ground-truth'
    if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path)
    if os.path.exists(ground_truth_dir_path):
        shutil.rmtree(ground_truth_dir_path)
    if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH):
        shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)

    os.mkdir(predicted_dir_path)
    os.mkdir(ground_truth_dir_path)
    os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)

    # Build Model
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    num_lines = sum(1 for line in open(FLAGS.annotation_path))
    with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
        for num, line in enumerate(annotation_file):
            annotation = line.strip().split()
            image_path = annotation[0]
            image_name = image_path.split('/')[-1]
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            bbox_data_gt = np.array(
                [list(map(int, box.split(','))) for box in annotation[1:]])

            if len(bbox_data_gt) == 0:
                bboxes_gt = []
                classes_gt = []
            else:
                bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
            ground_truth_path = os.path.join(ground_truth_dir_path,
                                             str(num) + '.txt')

            print('=> ground truth of %s:' % image_name)
            num_bbox_gt = len(bboxes_gt)
            with open(ground_truth_path, 'w') as f:
                for i in range(num_bbox_gt):
                    class_name = CLASSES[classes_gt[i]]
                    xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
                    bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax
                                          ]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print('=> predict result of %s:' % image_name)
            predict_result_path = os.path.join(predicted_dir_path,
                                               str(num) + '.txt')
            # Predict Process
            image_size = image.shape[:2]
            # image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])
            image_data = cv2.resize(np.copy(image), (INPUT_SIZE, INPUT_SIZE))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)

            if FLAGS.framework == 'tflite':
                interpreter.set_tensor(input_details[0]['index'], image_data)
                interpreter.invoke()
                pred = [
                    interpreter.get_tensor(output_details[i]['index'])
                    for i in range(len(output_details))
                ]
                if FLAGS.model == 'yolov4' and FLAGS.tiny == True:
                    boxes, pred_conf = filter_boxes(pred[1],
                                                    pred[0],
                                                    score_threshold=0.25)
                else:
                    boxes, pred_conf = filter_boxes(pred[0],
                                                    pred[1],
                                                    score_threshold=0.25)
            else:
                batch_data = tf.constant(image_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score)
            boxes, scores, classes, valid_detections = [
                boxes.numpy(),
                scores.numpy(),
                classes.numpy(),
                valid_detections.numpy()
            ]

            # if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
            #     image_result = utils.draw_bbox(np.copy(image), [boxes, scores, classes, valid_detections])
            #     cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image_result)

            with open(predict_result_path, 'w') as f:
                image_h, image_w, _ = image.shape
                for i in range(valid_detections[0]):
                    if int(classes[0][i]) < 0 or int(
                            classes[0][i]) > NUM_CLASS:
                        continue
                    coor = boxes[0][i]
                    coor[0] = int(coor[0] * image_h)
                    coor[2] = int(coor[2] * image_h)
                    coor[1] = int(coor[1] * image_w)
                    coor[3] = int(coor[3] * image_w)

                    score = scores[0][i]
                    class_ind = int(classes[0][i])
                    class_name = CLASSES[class_ind]
                    score = '%.4f' % score
                    ymin, xmin, ymax, xmax = list(map(str, coor))
                    bbox_mess = ' '.join(
                        [class_name, score, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print(num, num_lines)
コード例 #19
0
def main(_argv):
    global NUM_CLASS, STRIDES, ANCHORS, XYSCALE

    INPUT_SIZE = FLAGS.size
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)

    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)

    predicted_dir_path = './mAP/predicted'
    ground_truth_dir_path = './mAP/ground-truth'
    if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path)
    if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path)
    if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)

    os.mkdir(predicted_dir_path)
    os.mkdir(ground_truth_dir_path)
    os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)

    # Build Model
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    elif  FLAGS.framework == 'tvm':
        ctx = tvm.cpu(0)
        loaded_graph = open(os.path.join(FLAGS.weights, "modelDescription.json")).read()
        loaded_lib = tvm.runtime.load_module(os.path.join(FLAGS.weights, "modelLibrary.so"))
        loaded_params = bytearray(open(os.path.join(FLAGS.weights,  "modelParams.params"), "rb").read())
        #
        # Get rid of the leip key
        #
        graphjson = json.loads(loaded_graph)
        if 'leip' in list(graphjson.keys()):
            del graphjson['leip']
            loaded_graph = json.dumps(graphjson)

        m = graph_runtime.create(loaded_graph, loaded_lib, ctx)
        m.load_params(loaded_params)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    num_lines = sum(1 for line in open(FLAGS.annotation_path))
    with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
        for num, line in enumerate(annotation_file):
            annotation = line.strip().split()
            image_path = annotation[0]
            image_name = image_path.split('/')[-1]
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])

            if len(bbox_data_gt) == 0:
                bboxes_gt = []
                classes_gt = []
            else:
                bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
            ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt')

            print('=> ground truth of %s:' % image_name)
            num_bbox_gt = len(bboxes_gt)
            with open(ground_truth_path, 'w') as f:
                for i in range(num_bbox_gt):
                    class_name = CLASSES[classes_gt[i]]
                    xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
                    bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print('=> predict result of %s:' % image_name)
            predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')
            # Predict Process
            image_size = image.shape[:2]
            # image_data = utils.image_preprocess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])
            image_data = cv2.resize(np.copy(image), (INPUT_SIZE, INPUT_SIZE))

            if FLAGS.framework == 'tflite':
                image_data = image_data / 255.
                image_data = image_data[np.newaxis, ...].astype(np.float32)
                image_data_casted = image_data.astype(np.uint8)

                interpreter.set_tensor(input_details[0]['index'], image_data_casted)
                interpreter.invoke()
                # pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
                # if FLAGS.model == 'yolov4' and FLAGS.tiny == True:
                #     boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25)
                # else:
                #     boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25)
                fm1 = interpreter.get_tensor(output_details[0]['index']).astype(np.float32)
                fm2 = interpreter.get_tensor(output_details[1]['index']).astype(np.float32)
                fm3 = interpreter.get_tensor(output_details[2]['index']).astype(np.float32)
                print(fm1.shape)
                print(fm2.shape)
                print(fm3.shape)
                fm1 = my_dequantize(fm1.astype(np.float32), 1.1345850229263306, 223)
                fm2 = my_dequantize(fm2.astype(np.float32), 2.054811954498291, 242)
                fm3 = my_dequantize(fm3.astype(np.float32), 8.428282737731934, 248)
                pred = my_decode([fm1, fm2, fm3]) # these need to be ordered biggest tensor to smallest I think
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=FLAGS.score)

            elif FLAGS.framework == 'tvm':
                # image_data = image_data / 255. # DO NOT DIVIDE by 255 for uint8 eval!
                image_data = image_data[np.newaxis, ...].astype(np.float32)

                image_data_casted = image_data.astype(np.uint8)
                m.set_input("input_1", tvm.nd.array(image_data_casted))
                ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=1)
                prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
                fm1 = m.get_output(0).asnumpy()
                fm2 = m.get_output(1).asnumpy()
                fm3 = m.get_output(2).asnumpy()
                print(fm1.shape)
                print(fm2.shape)
                print(fm3.shape)
                fm1 = my_dequantize(fm1.astype(np.float32), 1.1345850229263306, 223)
                fm2 = my_dequantize(fm2.astype(np.float32), 2.054811954498291, 242)
                fm3 = my_dequantize(fm3.astype(np.float32), 8.428282737731934, 248)

                pred = my_decode([fm1, fm2, fm3]) # these need to be ordered biggest tensor to smallest I think
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=FLAGS.score)

                #exit()
            else:
                image_data = image_data / 255.
                image_data = image_data[np.newaxis, ...].astype(np.float32)

                batch_data = tf.constant(image_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score
            )
            boxes, scores, classes, valid_detections = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]

            # if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
            #     image_result = utils.draw_bbox(np.copy(image), [boxes, scores, classes, valid_detections])
            #     cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image_result)

            with open(predict_result_path, 'w') as f:
                image_h, image_w, _ = image.shape
                for i in range(valid_detections[0]):
                    if int(classes[0][i]) < 0 or int(classes[0][i]) > NUM_CLASS: continue
                    coor = boxes[0][i]
                    coor[0] = int(coor[0] * image_h)
                    coor[2] = int(coor[2] * image_h)
                    coor[1] = int(coor[1] * image_w)
                    coor[3] = int(coor[3] * image_w)

                    score = scores[0][i]
                    class_ind = int(classes[0][i])
                    class_name = CLASSES[class_ind]
                    score = '%.4f' % score
                    ymin, xmin, ymax, xmax = list(map(str, coor))
                    bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print(num, num_lines)
コード例 #20
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = False
    config.gpu_options.per_process_gpu_memory_fraction = 0.1

    _ = InteractiveSession(config=config)
    utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(
            model_path=f'{FLAGS.weights}_{FLAGS.size}')
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(
            f'{FLAGS.weights}_{FLAGS.size}', tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    all_start_time = None
    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            Image.fromarray(frame)
        else:
            fps = float(frame_num) / (time.time() - all_start_time)
            print("fps=%.2f size=%d frames=%d deep=%s output=%s" %
                  (fps, FLAGS.size, frame_num,
                   "true" if FLAGS.deep else "false", FLAGS.output))
            break
        frame_num += 1
        if FLAGS.info:
            print("frame_num=%d" % frame_num)
        start_time = time.time()
        if all_start_time is None:
            all_start_time = time.time()
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for _, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        if FLAGS.deep:
            features = encoder(frame, bboxes)
        else:
            features = np.empty((len(bboxes), 0), np.float32)

        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)

        # calculate frames per second of running detections
        if FLAGS.info:
            fps = 1.0 / (time.time() - start_time)
            print("fps=%.2f" % fps)

        if not FLAGS.dont_show:
            if cv2.waitKey(1) & 0xFF == ord('q'): break
    if not FLAGS.dont_show:
        cv2.destroyAllWindows()
def inference(preprocess_queue, inference_queue):

    import tensorflow as tf
    import core.utils as utils

    from tensorflow.python.saved_model import tag_constants
    from tensorflow.compat.v1 import InteractiveSession
    from tensorflow.compat.v1 import ConfigProto
    from core.functions import count_objects, crop_objects
    from core.config import cfg
    from core.utils import read_class_names
    import os
    import random
    from core.yolov4 import filter_boxes

    tf.keras.backend.clear_session()

    input_size = Parameters.input_size

    model = OutsourceContract.model
    framework = Parameters.framework
    tiny = OutsourceContract.tiny
    weights = Parameters.weights
    iou = Parameters.iou
    score = Parameters.score

    physical_devices = tf.config.experimental.list_physical_devices('GPU')

    try:
        if len(physical_devices) > 0:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except:
        pass

        # configure gpu usage
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)

    # load model
    if framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=weights)
    else:
        saved_model_loaded = tf.saved_model.load(weights,
                                                 tags=[tag_constants.SERVING])

    # read in all class names from config
    class_names = utils.read_class_names(cfg.YOLO.CLASSES)

    count = Parameters.count
    info = Parameters.info
    crop = Parameters.crop

    while True:
        if not preprocess_queue.empty():
            queueData = preprocess_queue.get()
            while not preprocess_queue.empty():
                queueData = preprocess_queue.get()
            #preprocess_queue.task_done()
            images_data = queueData[0]
            name = queueData[1]
            original_image = queueData[2]

            #preprocess_queue.task_done()

            if framework == 'tflite':
                interpreter.allocate_tensors()
                input_details = interpreter.get_input_details()
                output_details = interpreter.get_output_details()
                interpreter.set_tensor(input_details[0]['index'], images_data)
                interpreter.invoke()
                pred = [
                    interpreter.get_tensor(output_details[i]['index'])
                    for i in range(len(output_details))
                ]
                if model == 'yolov3' and tiny == True:
                    boxes, pred_conf = filter_boxes(
                        pred[1],
                        pred[0],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
                else:
                    boxes, pred_conf = filter_boxes(
                        pred[0],
                        pred[1],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
            else:
                infer = saved_model_loaded.signatures['serving_default']
                batch_data = tf.constant(images_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=iou,
                score_threshold=score)  # 1.2ms

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax

            original_h, original_w, _ = original_image.shape

            bboxes = utils.format_boxes(boxes.numpy()[0], original_h,
                                        original_w)  # 1ms #-> no tf needed

            # hold all detection data in one variable
            pred_bbox = [
                bboxes,
                scores.numpy()[0],
                classes.numpy()[0],
                valid_detections.numpy()[0]
            ]

            # by default allow all classes in .names file
            allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to allow detections for only people)
            # allowed_classes = ['person']

            # if crop flag is enabled, crop each detection and save it as new image
            if crop:
                crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                         image_name)
                try:
                    os.mkdir(crop_path)
                except FileExistsError:
                    pass
                crop_objects(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB),
                             pred_bbox, crop_path, allowed_classes)

            if count:
                # count objects found
                counted_classes = count_objects(
                    pred_bbox, by_class=False, allowed_classes=allowed_classes)
                # loop through dict and print
                for key, value in counted_classes.items():
                    print("Number of {}s: {}".format(key, value))
                boxtext, image = utils.draw_bbox(
                    original_image,
                    pred_bbox,
                    info,
                    counted_classes,
                    allowed_classes=allowed_classes)
            else:
                boxtext, image = utils.draw_bbox(
                    original_image,
                    pred_bbox,
                    info,
                    allowed_classes=allowed_classes)  # 0.5ms

            image = Image.fromarray(image.astype(np.uint8))  # 0.3ms

            inference_queue.put((boxtext, image, name))
コード例 #22
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    # load model
    if FLAGS.framework == 'tflite':
            interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
    else:
            saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        if FLAGS.framework == 'tflite':
            interpreter.allocate_tensors()
            input_details = interpreter.get_input_details()
            output_details = interpreter.get_output_details()
            print(input_details)
            print(output_details)
            interpreter.set_tensor(input_details[0]['index'], images_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
        else:
            infer = saved_model_loaded.signatures['serving_default']
            batch_data = tf.constant(images_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )
        pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
        image = utils.draw_bbox(original_image, pred_bbox)
        # image = utils.draw_bbox(image_data*255, pred_bbox)
        image = Image.fromarray(image.astype(np.uint8))
        if not FLAGS.dont_show:
            image.show()
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
コード例 #23
0
ファイル: object_tracker.py プロジェクト: euphoria2k/CVX-AI
def main(_argv):

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    #images = FLAGS.images
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0

    if FLAGS.shirt:
        allowed_classes = ['Shirt']
        #ROI =
    if FLAGS.trouser:
        allowed_classes = ['Trousers']
    if FLAGS.jeans:
        allowed_classes = ['Jeans']
    if FLAGS.dress:
        allowed_classes = ['Dress']
    if FLAGS.footwear:
        allowed_classes = ['Footwear']
    if FLAGS.jacket:
        allowed_classes = ['Jacket']
    if FLAGS.skirt:
        allowed_classes = ['Skirt']
    if FLAGS.suit:
        allowed_classes = ['Suit']

    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num += 1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(str(track.track_id), class_name, (int(
                        bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

            if FLAGS.color:
                PATH = './training.data'
                #(int(bbox[0])):(int(bbox[2])), (int(bbox[1])):(int(bbox[3]))
                #ROI = frame[(int(bbox[0]) +50) :(int(bbox[2]) - 50), (int(bbox[1])+ 50):(int(bbox[3])-50)]
                #ROI = frame[(int(bbox[1])) +15 :(int(bbox[3])-15),(int(bbox[0])+15):(int(bbox[2])-15)]
                ROI = frame[int((int(bbox[1]) + int(bbox[3])) /
                                2):int((int(bbox[1]) + int(bbox[3])) / 2) + 1,
                            int((int(bbox[0]) + int(bbox[2])) /
                                2):int((int(bbox[0]) + int(bbox[2])) / 2) + 1]
                #ROI = frame[(int(bbox[1])):(int(bbox[3])),(int(bbox[0])):(int(bbox[2]))]
                #ROI = frame[int(0.5* (int(bbox[1] - 50)+ int(bbox[3] + 50))),int(0.5*(int(bbox[0] - 50) +int(bbox[2] + 50 )))]
                #print(ROI)

                color_histogram_feature_extraction.color_histogram_of_test_image(
                    ROI)
                prediction = knn_classifier.main('training.data', 'test.data')
                #prediction = 'red'
                red = load_red('test.data')
                Red = str(red)
                #Red = str(Red_1)

                print('this is the variable of the red:- ' + str(Red))
                green = load_green('test.data')
                Green = str(green)
                #Green = str(Green_1)
                print('this is the variable of the green:- ' + str(Green))
                blue = load_blue('test.data')
                #Blue_1 = int(blue)
                Blue = str(blue)
                print('this is the variable of the blue:- ' + str(Blue))

                #hsv = rgb_to_hsv(red,green,blue)
                #print("HSV: " + str(hsv))

                if red and blue and green != None:
                    HLS = colorsys.rgb_to_hls(red, green, blue)
                    HUE = int(HLS[0])
                    Light = int(HLS[1])
                    Saturation = int(HLS[2])

                print("HLS is equal to", HLS)
                print('HUE: ', HUE)
                print('LIGHT: ', Light)
                print('Saturation', Saturation)

                if red and blue and green != None:
                    HSV = rgb_to_hsv(red, green, blue)
                    HUE_1 = int(HSV[0])
                    Saturation_1 = int(HSV[1])
                    Value = int(HSV[2])

                print("HSV is equal to", HSV)
                print('Hue: ', HUE_1)
                print('saturation: ', Saturation_1)
                print('value', Value)

                print(str(prediction) + " " + str(class_name))

            if FLAGS.Fuzzy_black:
                #if str(59.7) <= Red < str(200.9)  and  str(74) <= Blue < str(207) and str(70) <= Green < str(203):
                if 0 <= HUE_1 < 210 and 0 <= Saturation_1 < 41 and 0 <= Value < 86:
                    print("THIS IS THE black COLOR yaaaaaaaaaaaaaaaaaaaa")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BLACK" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_red:
                #if  str(139) <= Red < str(255)  and  str(0) <= Green < str(160) and str(0) <= Blue < str(128):
                if 0 <= HUE_1 < 348 and 47 <= Saturation_1 < 100 and 55 <= Value < 100:
                    print(
                        "THIS IS THE red COLOR redddddddddddddddddddddddddddddddddddd"
                    )
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "RED" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_orange:
                #if  str(255) <= Red < str(255)  and  str(69) <= Green < str(165) and str(0) <= Blue < str(80):
                if 9 <= HUE_1 < 39 and 69 <= Saturation_1 < 100 and Value == 100:
                    print(
                        "THIS IS THE ORANGE COLOR orangeeeeeeeeeeeeeeeeeeeeeeee"
                    )
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "ORANGE" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_yellow:
                #if  str(189) <= Red < str(255)  and  str(183) <= Green < str(255) and str(0) <= Blue < str(224):
                if 0 <= HUE_1 < 56 and 12 <= Saturation_1 < 100 and 74 <= Value < 100:
                    print("THIS IS THE YELLOW COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "YELLOW" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_blue:
                #if  str(0) <= Red < str(176)  and  str(0) <= Green < str(244) and str(112) <= Blue < str(255):
                if 187 <= HUE_1 < 240 and 21 <= Saturation_1 < 100 and 44 <= Value < 100:
                    print("THIS IS THE BLUE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BLUE" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_white:
                #if  str(240) <= Red < str(255)  and  str(228) <= Green < str(255) and str(215) <= Blue < str(255):
                if 0 <= HUE_1 < 340 and 0 <= Saturation_1 < 14 and 96 <= Value < 100:
                    print("THIS IS THE WHITE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "WHITE" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_purple:
                #if  str(72) <= Red < str(255)  and  str(0) <= Green < str(230) and str(128) <= Blue < str(255):
                if 0 <= HUE_1 < 302 and 8 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE PURPLE COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + "PURPLE" + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.Fuzzy_green:
                #if  str(0) <= Red < str(173)  and  str(100) <= Green < str(255) and str(0) <= Blue < str(170):
                if 0 <= HUE_1 < 160 and 24 <= Saturation_1 < 100 and 39 <= Value < 100:
                    print("THIS IS THE green COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "GREEN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_brown:
                #if  str(128) <= Red < str(255)  and  str(0) <= Green < str(248) and str(0) <= Blue < str(288):
                if 0 <= HUE_1 < 48 and 14 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE BROWN COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "BROWN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_cyan:
                #if  str(0) <= Red < str(244)  and  str(128) <= Green < str(255) and str(128) <= Blue < str(255):
                if 0 <= HUE_1 < 182 and 12 <= Saturation_1 < 100 and 50 <= Value < 100:
                    print("THIS IS THE CYAN COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "CYAN" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)
            if FLAGS.Fuzzy_pink:
                #if  str(199) <= Red < str(255)  and  str(20) <= Green < str(192) and str(133) <= Blue < str(203):
                if 322 <= HUE_1 < 351 and 25 <= Saturation_1 < 92 and 78 <= Value < 100:
                    print("THIS IS THE PINK COLOR")
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame,
                        class_name + " " + "PINK" + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            if FLAGS.black:
                if prediction == 'black':
                    #ROI = frame[int((int(bbox[1]) + int(bbox[3]))/2):int((int(bbox[1]) + int(bbox[3]))/2)+1,int((int(bbox[0]) + int(bbox[2]))/2):int((int(bbox[0]) + int(bbox[2]))/2)+1]
                    #color_histogram_feature_extraction.color_histogram_of_test_image(ROI)
                    #prediction = knn_classifier.main('training.data','test.data')
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.blue:
                if prediction == 'blue':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.red:
                if prediction == 'red':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.yellow:
                if prediction == 'yellow':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.orange:
                if prediction == 'orange':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.violet:
                if prediction == 'violet':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.white:
                if prediction == 'white':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
            if FLAGS.green:
                if prediction == 'green':
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), color, 2)
                    cv2.rectangle(
                        frame, (int(bbox[0]), int(bbox[1] - 30)),
                        (int(bbox[0]) +
                         (len(class_name) + len(str(track.track_id))) * 17,
                         int(bbox[1])), color, -1)
                    cv2.putText(
                        frame, class_name + " " + str(prediction) + "-" +
                        str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)),
                        0, 0.75, (255, 255, 255), 2)
                #cv2.putText(frame, class_name + " " + str(prediction) + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
                #print('ferture data:' +" " +  feature_data)
                #result_1 = np.asarray(frame)
                #result_1 = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                #cv2.imshow('color classifier', result_1)
                #print(color_histogram_feature_extraction.feature_data)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #24
0
    def Score(self, cvImage):
        """Use tflite interpreter to predict bounding boxes and 
        confidence score."""
        with self._lock:
            timestamp = datetime.datetime.now()
            # Predict
            try:
                image_data = self.Preprocess(cvImage)
                self.interpreter.set_tensor(self.input_details[0]['index'],
                                            image_data)
                self.interpreter.invoke()
                pred = [
                    self.interpreter.get_tensor(
                        self.output_details[i]['index'])
                    for i in range(len(self.output_details))
                ]
            except Exception as err:
                return [{
                    '[ERROR]':
                    'Error during prediciton: {}'.format(repr(err))
                }]

            # Filter and NMS
            try:
                boxes, pred_conf = filter_boxes(
                    pred[0],
                    pred[1],
                    score_threshold=0.25,
                    input_shape=tf.constant([self.input_size,
                                             self.input_size]))
                boxes, scores, indices, valid_detections = tf.image.combined_non_max_suppression(
                    boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                    scores=tf.reshape(
                        pred_conf,
                        (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                    max_output_size_per_class=50,
                    max_total_size=50,
                    iou_threshold=FLAGS.iou,
                    score_threshold=FLAGS.score)
            except Exception as err:
                return [{
                    '[ERROR]':
                    'Error during filter and NMS: {}'.format(repr(err))
                }]

            try:
                # Save image w/ annotations to Blob Storage (through IoT module
                # and then to cloud Azure Storage pending connectivity)
                pred_bbox = [
                    boxes.numpy(),
                    scores.numpy(),
                    indices.numpy(),
                    valid_detections.numpy()
                ]
                image = cv2.cvtColor(cvImage, cv2.COLOR_BGR2RGB)
                image_annot = utils.draw_bbox(image, pred_bbox)
                #image = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB)

                pil_image = Image.fromarray(image_annot.astype(np.uint8))
                # Save annotaed image to buffer
                bytes_io_annot = io.BytesIO()
                pil_image.save(bytes_io_annot, format='JPEG')
                bytes_im_annot = bytes_io_annot.getvalue()

                # Unannotated image to buffer
                pil_image = Image.fromarray(image.astype(np.uint8))
                bytes_io_unannot = io.BytesIO()
                pil_image.save(bytes_io_unannot, format='JPEG')
                bytes_im_unannot = bytes_io_unannot.getvalue()

                # To check if there are bboxes
                indices_check = np.squeeze(indices.numpy(), axis=0)
                scores_check = np.squeeze(scores.numpy(), axis=0)
                if scores_check.any() > FLAGS.score:
                    # Name in blob to use
                    blob_name = str(timestamp.strftime(
                        "%d-%b-%Y-%H-%M-%S.%f")) + "_annotated.jpg"
                    blob_metadata = {
                        'timestamp':
                        str(timestamp.strftime("%d-%b-%Y-%H-%M-%S.%f")),
                        'objects':
                        ','.join(
                            set([
                                self._labelList[int(indices_check[i])]
                                for i in range(len(indices_check))
                                if scores_check[i] > FLAGS.score
                            ]))
                    }
                    try:
                        container_client = self.blob_service_client.get_container_client(
                            self.local_container_name_annotated)
                        props = container_client.get_container_properties()
                    except Exception as err:
                        # Local container needs to be created if not
                        container_client.create_container()
                    # Upload pil image as buffer
                    container_client.upload_blob(blob_name,
                                                 bytes_im_annot,
                                                 metadata=blob_metadata)
                # If all scores are below threshold let's store the frames for later use
                if scores_check.all() < FLAGS.score:
                    # Name in blob to use
                    blob_name = str(timestamp.strftime(
                        "%d-%b-%Y-%H-%M-%S.%f")) + "_lowconf.jpg"
                    blob_metadata = {
                        'timestamp':
                        str(timestamp.strftime("%d-%b-%Y-%H-%M-%S.%f")),
                        'objects':
                        ','.join(
                            set([
                                self._labelList[int(indices_check[i])]
                                for i in range(len(indices_check))
                            ]))
                    }
                    try:
                        container_client = self.blob_service_client.get_container_client(
                            self.local_container_name_lowconf)
                        props = container_client.get_container_properties()
                    except Exception as err:
                        # Local container needs to be created if not
                        container_client.create_container()
                    # Upload pil image as buffer
                    container_client.upload_blob(blob_name,
                                                 bytes_im_unannot,
                                                 metadata=blob_metadata)
            except Exception as err:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                return [{
                    '[ERROR]':
                    'Error sending image to local blob storage: {}'.format(
                        repr(
                            traceback.format_exception(exc_type, exc_value,
                                                       exc_traceback)))
                }]

            # Postprocess
            try:
                boxes = np.squeeze(boxes.numpy(), axis=0)
                scores = np.squeeze(scores.numpy(), axis=0)
                indices = np.squeeze(indices.numpy(), axis=0)
                results = self.Postprocess(boxes, scores, indices)
            except Exception as err:
                return [{
                    '[ERROR]':
                    'Error during postprocess: {}'.format(repr(err))
                }]

        return results
コード例 #25
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_num += 1
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
    
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(boxes.numpy()[0], original_h, original_w)

        pred_bbox = [bboxes, scores.numpy()[0], classes.numpy()[0], valid_detections.numpy()[0]]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to allow detections for only people)
        #allowed_classes = ['person']

        # if crop flag is enabled, crop each detection and save it as new image
        if FLAGS.crop:
            crop_rate = 150 # capture images every so many frames (ex. crop photos every 150 frames)
            crop_path = os.path.join(os.getcwd(), 'detections', 'crop', video_name)
            try:
                os.mkdir(crop_path)
            except FileExistsError:
                pass
            if frame_num % crop_rate == 0:
                final_path = os.path.join(crop_path, 'frame_' + str(frame_num))
                try:
                    os.mkdir(final_path)
                except FileExistsError:
                    pass          
                crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), pred_bbox, final_path, allowed_classes)
            else:
                pass

        if FLAGS.count:
            # count objects found
            counted_classes = count_objects(pred_bbox, by_class = True, allowed_classes=allowed_classes)
            # loop through dict and print
            for key, value in counted_classes.items():
                print("Number of {}s: {}".format(key, value))
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, counted_classes, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
        else:
            image = utils.draw_bbox(frame, pred_bbox, FLAGS.info, allowed_classes=allowed_classes, read_plate=FLAGS.plate)
        
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # if not FLAGS.dont_show:
            # cv2.imshow("result", result)
        
        if FLAGS.output:
            out.write(result)
コード例 #26
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    firstFrame = True
    frame_num = 0
    while True:
        return_value, frame_1 = vid.read()
        pts = []
        aa = []
        bb = []
        cc = []
        dd = []
        while firstFrame:

            def click_event(event, x, y, flags, param):
                global pts
                if event == cv2.EVENT_LBUTTONDOWN:
                    pts.append((x, y))
                    cv2.circle(frame_1,
                               center=(x, y),
                               radius=5,
                               color=(0, 0, 255),
                               thickness=-1)
                    strXY = str(x) + " " + str(y)
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    cv2.putText(frame_1, strXY, (x, y), font, 0.5,
                                (255, 255, 0), 2)
                elif event == cv2.EVENT_RBUTTONDOWN:
                    if pts:
                        pts.pop()
                cv2.imshow('bobur', frame_1)

            cv2.imshow('bobur', frame_1)
            cv2.setMouseCallback('bobur', click_event)
            if cv2.waitKey(1) & 0xFF == ord('c'):
                firstFrame = False
                break
            if len(pts) >= 4:
                aa.append(pts[0])
                bb.append(pts[1])
                cc.append(pts[2])
                dd.append(pts[3])
            print(aa, bb, cc, dd)

        a, b, c, d, e, f, g, h = [209, 1040], [331, 197], [1124, 197], [
            1907, 850
        ], [0, 0], [1920, 0], [1920, 1080], [0, 1080]
        # e,f,g,h = [0,0],[1920,0],[1920,1080],[0,1080]
        external_poly = [
            np.array([e, b, c, f]),
            np.array([f, c, d, g]),
            np.array([g, d, a, h]),
            np.array([h, a, b, e])
        ]
        frame = cv2.fillPoly(frame_1, external_poly, (0, 0, 0))
        # cv2.line(frame,(209, 1040),(331,197),(255,0,0),2)
        # cv2.line(frame,(331, 197), (1124,197),(255,0,0),2)
        # cv2.line(frame,(1124,197),(1907,850),(255,0,0),2)
        # cv2.line(frame,(209, 1040),(1907,850),(255,0,0),2)
        # cv2.line(frame,a,b,(255,0,0),2)
        # cv2.line(frame,b,c,(255,0,0),2)
        # cv2.line(frame,c,d,(255,0,0),2)
        # cv2.line(frame,a,d,(255,0,0),2)

        if return_value:
            # frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)  #rotate the video for mobile videos
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_num += 1
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        if frame_num % 15 == 0:

            frame_size = frame.shape[:2]
            image_data = cv2.resize(frame, (input_size, input_size))
            image_data = image_data / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)
            start_time = time.time()

            if FLAGS.framework == 'tflite':
                interpreter.set_tensor(input_details[0]['index'], image_data)
                interpreter.invoke()
                pred = [
                    interpreter.get_tensor(output_details[i]['index'])
                    for i in range(len(output_details))
                ]
                if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                    boxes, pred_conf = filter_boxes(
                        pred[1],
                        pred[0],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
                else:
                    boxes, pred_conf = filter_boxes(
                        pred[0],
                        pred[1],
                        score_threshold=0.25,
                        input_shape=tf.constant([input_size, input_size]))
            else:
                batch_data = tf.constant(image_data)
                pred_bbox = infer(batch_data)
                for key, value in pred_bbox.items():
                    boxes = value[:, :, 0:4]
                    pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=100,
                max_total_size=100,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score)

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, xmax, ymax
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(boxes.numpy()[0], original_h,
                                        original_w)

            pred_bbox = [
                bboxes,
                scores.numpy()[0],
                classes.numpy()[0],
                valid_detections.numpy()[0]
            ]
            # print(pred_bbox[2])
            out_boxes, out_scores, out_classes, num_boxes = pred_bbox

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            # allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to allow detections for only SELECTED DETECTION CLASSES)
            allowed_classes = ['person', 'car', 'truck', 'bus', 'motorbike']
            # allowed_classes = ['car']

            # if crop flag is enabled, crop each detection and save it as new image
            if FLAGS.crop:
                crop_rate = 150  # capture images every so many frames (ex. crop photos every 150 frames)
                crop_path = os.path.join(os.getcwd(), 'detections', 'crop',
                                         video_name)
                try:
                    os.mkdir(crop_path)
                except FileExistsError:
                    pass
                if frame_num % crop_rate == 0:
                    final_path = os.path.join(crop_path,
                                              'frame_' + str(frame_num))
                    try:
                        os.mkdir(final_path)
                    except FileExistsError:
                        pass
                    crop_objects(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),
                                 pred_bbox, final_path, allowed_classes)
                else:
                    pass

            if FLAGS.count:
                # count objects found
                counted_classes = count_objects(
                    pred_bbox, by_class=True, allowed_classes=allowed_classes)
                # loop through dict and print
                for key, value in counted_classes.items():
                    print("Number of {}s: {}".format(key, value))
                image = utils.draw_bbox(frame,
                                        pred_bbox,
                                        FLAGS.info,
                                        counted_classes,
                                        allowed_classes=allowed_classes,
                                        read_plate=FLAGS.plate)
            else:
                image = utils.draw_bbox(frame,
                                        pred_bbox,
                                        FLAGS.info,
                                        allowed_classes=allowed_classes,
                                        read_plate=FLAGS.plate)

            fps = 1.0 / (time.time() - start_time)
            print("FPS: %.2f" % fps)
            result = np.asarray(image)
            cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
            result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            if not FLAGS.dont_show:
                cv2.imshow("result", result)

            if FLAGS.output:
                out.write(result)
            if cv2.waitKey(1) & 0xFF == ord('q'): break
    vid.release()
    cv2.destroyAllWindows()
コード例 #27
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        pred_bbox = [
            boxes.numpy(),
            scores.numpy(),
            classes.numpy(),
            valid_detections.numpy()
        ]
        image = utils.draw_bbox(frame, pred_bbox)
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(image)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imshow("result", result)

        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #28
0
def main(_argv):

    with open("./config_birdview.yml", "r") as ymlfile:
        bird_view_cfg = yaml.load(ymlfile)

    width_og, height_og = 0, 0
    corner_points = []
    for section in bird_view_cfg:
        corner_points.append(bird_view_cfg["image_parameters"]["p1"])
        corner_points.append(bird_view_cfg["image_parameters"]["p2"])
        corner_points.append(bird_view_cfg["image_parameters"]["p3"])
        corner_points.append(bird_view_cfg["image_parameters"]["p4"])
        width_og = int(bird_view_cfg["image_parameters"]["width_og"])
        height_og = int(bird_view_cfg["image_parameters"]["height_og"])
        img_path = bird_view_cfg["image_parameters"]["img_path"]
        size_height = bird_view_cfg["image_parameters"]["size_height"]
        size_width = bird_view_cfg["image_parameters"]["size_width"]

    tr = np.array([
        bird_view_cfg["image_parameters"]["p4"][0],
        bird_view_cfg["image_parameters"]["p4"][1],
    ])
    tl = np.array([
        bird_view_cfg["image_parameters"]["p2"][0],
        bird_view_cfg["image_parameters"]["p2"][1],
    ])
    br = np.array([
        bird_view_cfg["image_parameters"]["p3"][0],
        bird_view_cfg["image_parameters"]["p3"][1],
    ])
    bl = np.array([
        bird_view_cfg["image_parameters"]["p1"][0],
        bird_view_cfg["image_parameters"]["p1"][1],
    ])

    widthA = np.sqrt(((br[0] - bl[0])**2) + ((br[1] - bl[1])**2))
    widthB = np.sqrt(((tr[0] - tl[0])**2) + ((tr[1] - tl[1])**2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0])**2) + ((tr[1] - br[1])**2))
    heightB = np.sqrt(((tl[0] - bl[0])**2) + ((tl[1] - bl[1])**2))
    maxHeight = max(int(heightA), int(heightB))

    matrix, imgOutput = compute_perspective_transform(corner_points, maxWidth,
                                                      maxHeight,
                                                      cv2.imread(img_path))
    height, width, _ = imgOutput.shape
    dim = (width, height)

    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = "model_data/mars-small128.pb"
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == "tflite":
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures["serving_default"]

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    output_video_1, output_video_2 = None, None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        """
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        """
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:

        black_img = cv2.imread("./black_bg.png")
        black_img = cv2.resize(black_img, dim, interpolation=cv2.INTER_AREA)

        return_value, frame = vid.read()

        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print("Video has ended or failed, try a different video format!")
            break
        frame_num += 1
        print("Frame #: ", frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.0
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == "tflite":
            interpreter.set_tensor(input_details[0]["index"], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]["index"])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == "yolov3" and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(
                    pred[1],
                    pred[0],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
            else:
                boxes, pred_conf = filter_boxes(
                    pred[0],
                    pred[1],
                    score_threshold=0.25,
                    input_shape=tf.constant([input_size, input_size]),
                )
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        (
            boxes,
            scores,
            classes,
            valid_detections,
        ) = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score,
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        #         allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ["person"]

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(
                frame,
                "Objects being tracked: {}".format(count),
                (5, 35),
                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                2,
                (0, 255, 0),
                2,
            )
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        # initialize color map
        cmap = plt.get_cmap("tab20b")
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        bbox_array = []
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            bbox_array.append(
                (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1])),
                (int(bbox[2]), int(bbox[3])),
                color,
                2,
            )
            cv2.rectangle(
                frame,
                (int(bbox[0]), int(bbox[1] - 30)),
                (
                    int(bbox[0]) +
                    (len(class_name) + len(str(track.track_id))) * 17,
                    int(bbox[1]),
                ),
                color,
                -1,
            )
            cv2.putText(
                frame,
                class_name + "-" + str(track.track_id),
                (int(bbox[0]), int(bbox[1] - 10)),
                0,
                0.75,
                (255, 255, 255),
                2,
            )

            # if enable info flag then print details about each track
            if FLAGS.info:
                print(
                    "Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}"
                    .format(
                        str(track.track_id),
                        class_name,
                        (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(
                            bbox[3])),
                    ))

        if len(bbox_array) >= 1:
            array_centroids, array_groundpoints = get_centroids_and_groundpoints(
                bbox_array)
            transformed_downoids = compute_point_perspective_transformation(
                matrix, array_centroids)

            # Show every point on the top view image
            for point in transformed_downoids:
                x, y = point
                cv2.circle(black_img, (x, y), 60, (0, 255, 0), 2)
                cv2.circle(black_img, (x, y), 3, (0, 255, 0), -1)

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        #         result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            if output_video_1 is None and output_video_2 is None:
                fourcc1 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_1 = cv2.VideoWriter(
                    "./video.avi", fourcc1, 25,
                    (frame.shape[1], frame.shape[0]), True)
                fourcc2 = cv2.VideoWriter_fourcc(*"MJPG")
                output_video_2 = cv2.VideoWriter(
                    "./bird_view.avi",
                    fourcc2,
                    25,
                    (black_img.shape[1], black_img.shape[0]),
                    True,
                )

            elif output_video_1 is not None and output_video_2 is not None:
                output_video_1.write(frame)
                output_video_2.write(black_img)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    cv2.destroyAllWindows()
コード例 #29
0
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    images = FLAGS.images

    # load model
    if FLAGS.framework == 'tflite':
            interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
    else:
            saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])

    # loop through images in list and run Yolov4 model on each
    for count, image_path in enumerate(images, 1):
        original_image = cv2.imread(image_path)
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        image_data = cv2.resize(original_image, (input_size, input_size))
        image_data = image_data / 255.

        images_data = []
        for i in range(1):
            images_data.append(image_data)
        images_data = np.asarray(images_data).astype(np.float32)

        if FLAGS.framework == 'tflite':
            interpreter.allocate_tensors()
            input_details = interpreter.get_input_details()
            output_details = interpreter.get_output_details()
            print(input_details)
            print(output_details)
            interpreter.set_tensor(input_details[0]['index'], images_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
        else:
            infer = saved_model_loaded.signatures['serving_default']
            batch_data = tf.constant(images_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=5,
            max_total_size=10,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score,
        )
        pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to allow detections for only people)
        #allowed_classes = ['person']

        image = utils.draw_bbox(original_image, pred_bbox, allowed_classes = allowed_classes)

        image = Image.fromarray(image.astype(np.uint8))

        if not FLAGS.dont_show:
            image.show()
        image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
        cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)

        # ========== voiceFeedback ==========

        valid_items = pred_bbox[3][0]
        valid_classes = pred_bbox[2][0]
        valid_boxes = pred_bbox[0][0]
        # section = (input_size/3)
        (H, W) = original_image.shape[:2]
        res = []
        for i in range(valid_items):
            (top, left, bottom, right) = valid_boxes[i]

            centerX = round((right + left)/2)
            centerY = round((top + bottom)/2)
            if centerX <= W/3:
                w_pos = 'left '
            elif centerX <= (W/3 * 2):
                w_pos = 'center '
            else:
                w_pos = 'right '

            if centerY <= H/3:
                h_pos = 'top '
            elif centerY <= (H/3 * 2):
                h_pos = 'mid '
            else:
                h_pos = 'bottom '
            res.append(h_pos + w_pos + allowed_classes[int(valid_classes[i])])

        description = ', '.join(res)

        tts = gTTS(text=description, lang="en", slow=False)
        filename = f'./detections/voice{count}.mp3'
        tts.save(filename)
        playsound.playsound(filename)
コード例 #30
0
def startRecording_YOLO():
    date_and_time = time.strftime("%Y%m%d-%H-%M-%S") #Stores current date and time in YYYY-MM-DD-HH:MM format
    vid_out_path = os.path.join(PROJECT_DIR, 'YoloV4', 'outputs', date_and_time + '.avi')
    
    
    #vid = cv2.VideoCapture(test_drive) #0 for webcam/Raspberry Pi Cam
    videothread = VideoThread(resolution=(640,480), framerate=30).start()

    width = int(videothread.stream.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(videothread.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(videothread.stream.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    
    #width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    #height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    #fps = int(vid.get(cv2.CAP_PROP_FPS))
    #codec = cv2.VideoWriter_fourcc(*'XVID')
    #output_video = cv2.VideoWriter(vid_out_path, codec, fps, (width,height))
    frame_number = 0
    freq = cv2.getTickFrequency()
    avg_fps = 0

    #while video is running/recording
    while True:
        return_val, frame = videothread.read()
        #return_val, frame = vid.read()
        
        if return_val:
            #frame = cv2.flip(frame, -1)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video error, try another format')
            break
        
        frame_number += 1
        #print('Frame #: ', frame_number)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data/ 255.
        #mage_data = np.expand_dims(frame_resized, axis = 0)

        #if floating_model:
         #   image_data = (np.float32(image_data) - 127.5)/127.5
        image_data = image_data[np.newaxis, ...].astype(np.float32) #Converts image data to a float32 type
        start_time = time.time()

        #TFLite Detections
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        prediction = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        #box = interpreter.get_tensor(output_details[0]['index'])[0]
        #scores = interpreter.get_tensor(output_details[2]['index'])[0]
        boxes, prediction_conf = filter_boxes(prediction[0], prediction[1], score_threshold=0.4, input_shape=tf.constant([input_size, input_size]))

        #Reshape = returns a new tensor that has the same values as tensor in the same order, but with a new shape given by shape
        #Shape = returns a 1-D integer tensor, represents the shape of the input 
        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes = tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores = tf.reshape(prediction_conf, (tf.shape(prediction_conf)[0], -1, tf.shape(prediction_conf)[-1])),
            max_output_size_per_class = 50,
            max_total_size = 50,
            iou_threshold = 0.45,
            score_threshold = 0.5
        )

        #convert the received data into numpy arrays, then slice out unused elements
        number_of_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0 : int(number_of_objects)]
        scores = scores.numpy()[0]
        scores = scores[0 : int(number_of_objects)]
        classes = classes.numpy()[0]
        classes = classes[0 : int(number_of_objects)]

        #format bounding boxes with normalized minimums and maximums of x and y
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        prediction_bbox = [bboxes, scores, classes, number_of_objects]

        #Read in all the class names from config and only allow certain ones to be detected (eases computation power)
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)
        allowed_classes = ['traffic light', 'person', 'car', 'stop sign']

        #loop through objects and get classification name, using only the ones allows in allowed_classes
        names = []
        deleted_indx = []
        for i in range(number_of_objects):
            classification_index = int(classes[i])
            class_name = class_names[classification_index]
            if class_name not in allowed_classes: deleted_indx.append(i)
            else: names.append(class_name)
        names = np.array(names)
        count = len(names)

        #delete irrelevant detections (not in allowed_classes)
        bboxes = np.delete(bboxes, deleted_indx, axis = 0)
        scores = np.delete(scores, deleted_indx, axis = 0)

        #Feed tracker with encoded yolo detections
        detections_features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, detection_feature) for bbox, score, class_name, detection_feature in zip(bboxes, scores, names, detections_features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        #run non-maxima supression (reduces amount of detected entities to as little as possible)
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        #Call tracker
        tracker.predict()
        tracker.update(detections)

        #update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1: continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            #if class_name == 'person': print('person found')

        #change frame to that which showcases the lane detection
        #frame = lane_detect.detect_edges(frame) #COMMENT OUT IF/WHEN ERROR OCCURS

        #distance approximation (barebones, needs more adjusting)
            cam_parameter = 18    #change with different cameras. Gets the detected distance closer to actual distance
            distance = (np.pi)/(bbox[2].item() + bbox[3].item()) * 1000 + cam_parameter
            det_dest = str(int(distance))
    
        #draw bounded box on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)), (int(bbox[0]) + (len(class_name) + len(det_dest)) * 18, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
            cv2.putText(frame, class_name + ": " + str(int(distance)), (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75, (255, 255, 255), 2)
        
        #calculate fps of running detections
        fps = 1.0/ (time.time() - start_time)
        avg_fps = avg_fps + fps
        #print("FPS: %.2f" % fps)
        cv2.putText(frame, "FPS: " + str(int(fps)), (width - 100, height - 20),0, 0.75, (255,255,255),2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        cv2.imshow("Output Video", result)

        output_video.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
    print('Average FPS: ', (avg_fps/frame_number))
    print('Number of Frames: ', frame_number)
    videothread.stop()