Beispiel #1
0
def main(_argv):
    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)

        model.summary()
    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)

    while True:
        frames = pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()

        # Align the depth frame to color frame
        aligned_frames = align.process(frames)

        # Get aligned frames
        depth_frame = aligned_frames.get_depth_frame()
        color_frame = aligned_frames.get_color_frame()
        if not depth_frame or not color_frame:
            continue

        depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics
        color_intrin = color_frame.profile.as_video_stream_profile().intrinsics
        depth_to_color_extrin = depth_frame.profile.get_extrinsics_to(
            color_frame.profile)

        depth_image = np.asanyarray(depth_frame.get_data())
        color_image = np.asanyarray(color_frame.get_data())

        frame = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame)

        frame_size = frame.shape[:2]
        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        prev_time = time.time()

        scaled_depth = cv2.convertScaleAbs(depth_image, alpha=0.08)
        depth_colormap = cv2.applyColorMap(scaled_depth, cv2.COLORMAP_JET)

        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        else:
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                                XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)

        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.25)
        bboxes = utils.nms(bboxes, 0.213, method='nms')

        view2d = np.zeros((480, 640, 3), np.uint8)

        for box in bboxes:
            x_mid = int((box[0] + box[2]) / 2)
            y_mid = int((box[1] + box[3]) / 2)
            pixel_depths = []
            for i in range(3):
                for j in range(3):
                    pixel_depths.append(
                        depth_frame.get_distance(int(x_mid + i - 1),
                                                 int(y_mid + j - 1)))
            object_depth = statistics.median(pixel_depths)
            object_point = rs.rs2_deproject_pixel_to_point(
                depth_intrin, [x_mid, y_mid], object_depth)
            if box[5] == 67.0:
                print('found phone')
                if object_depth == 0.0:
                    print('depth not found')
                depth_colormap[max(0, min(y_mid, 479)),
                               max(0, min(x_mid, 639))] = [0, 255, 0]
                view2d[max(0, min(480 - int(object_point[2] * 350), 479)),
                       max(0, min(int(object_point[0] * 350) +
                                  320, 639))] = [0, 255, 0]
            #print('x_min', box[0])
            #print('y_min', box[1])
            #print('x_max', box[2])
            #print('y_max', box[3])
            #print('probability', box[4])
            #print('object_id', box[5])
            #print('point', object_point)
            #print('-----')

        #curr_time = time.time()
        #exec_time = curr_time - prev_time
        #info = "time: %.2f ms" %(1000*exec_time)
        #print(info)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        image_color = utils.draw_bbox(frame, bboxes)
        result = cv2.cvtColor(image_color, cv2.COLOR_RGB2BGR)
        image_depth = utils.draw_bbox(depth_colormap, bboxes)
        images = np.hstack((view2d, image_depth))
        cv2.imshow("result", images)
        print('-----')
        if cv2.waitKey(1) & 0xFF == ord('q'):
            pipeline.stop()
            break
Beispiel #2
0
def main(_argv):
    #Yolo-tiny버전이 아닌지 if문을 통해 구분한다. 
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        #tiny버전이 아닐 경우 Yolo-v4모델을 가져오고 anchor박스의 정보도 함께 가져온다.
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)

    #클래스개수, 박스의 XYSCALE을 Yolo-v4의 cfg파일에서 불러오고 input_size와 image_path를 미리 정의한 flags객체의 size와 image값으로 정의한다.
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size
    image_path = FLAGS.image

    #cv2모듈을 통해 이미지를 불러오고 불러온 이미지를 BGR이미지를 RGB로 바꿔준다.
    #이는 컬러 사진을 opencv에서는 BGR순서로 저장하는데 matplotlib에서는 RGB로 저장하기 때문이다.
    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    original_image_size = original_image.shape[:2]

    #이미지 데이터들을 배열로 바꿔주고 데이터타입을 float32로 변환해준다.
    image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    #framework가 tf로 정의된 경우 FLAGS.model이 어떻게 정의되었는지에 따라 불러오는 모델이 다르다.
    #지금의 경우는 Yolo-v4를 다루고 있으므로 FLAGS.model이 yolov4로 정의된 경우만 보겠다.
    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)

            #YOLOv4에 input 레이어와 클래스를 넣어주어 feature map을 생성하고 바운딩 박스를 예측하기 위한 리스트를 선언해준다.
            #이후 반복문을 통해 예측된 바운딩박스의 좌표를 리스트에 넣어준 뒤 이것을 model에 input레이어와 함께 넣어 model을 생성해준다.
            #그 다음 미리 학습된 weights값들을 load해온다.    
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)

        model.summary()
        #이후 원래 이미지 데이터에서 예측된 바운딩 박스를 표시해준다.
        pred_bbox = model.predict(image_data)
    else:.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
        
    #이후 표시된 바운딩 박스 중 유효한 바운딩 박스들만 남기는 작업을 한 후 최종적으로 pred_bbox에 저장한다.
    if FLAGS.model == 'yolov4':
        pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE)
    else:
        pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
    bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
    bboxes = utils.nms(bboxes, 0.213, method='nms')

    #cv2모듈을 사용하여 예측한 바운딩박스가 표시된 이미지를 출력한다.
    image = utils.draw_bbox(original_image, bboxes)
    image = Image.fromarray(image)
    image.show()
def main(_argv):
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size
    video_path = FLAGS.video

    print("Video from: ", video_path)
    vid = cv2.VideoCapture(video_path)

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)

        model.summary()
    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)

    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            raise ValueError("No image! Try with another video format")
        frame_size = frame.shape[:2]
        image_data = utils.image_preporcess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        prev_time = time.time()

        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        else:
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                                XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)

        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.25)
        bboxes = utils.nms(bboxes, 0.213, method='nms')

        image = utils.draw_bbox(frame, bboxes)
        curr_time = time.time()
        exec_time = curr_time - prev_time
        result = np.asarray(image)
        info = "time: %.2f ms" % (1000 * exec_time)
        print(info)
        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imshow("result", result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv):
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size
    image_path = FLAGS.image

    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    original_image_size = original_image.shape[:2]

    image_data = utils.image_preprocess(np.copy(original_image),
                                        [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)

                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) -
                                            1] == "weights":
                    utils.load_weights(model, FLAGS.weights)
                else:
                    model.load_weights(FLAGS.weights).expect_partial()

        model.summary()
        pred_bbox = model.predict(image_data)
    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
        interpreter.set_tensor(input_details[0]['index'], image_data)
        interpreter.invoke()
        pred_bbox = [
            interpreter.get_tensor(output_details[i]['index'])
            for i in range(len(output_details))
        ]

    if FLAGS.model == 'yolov4':
        pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                            XYSCALE)
    else:
        pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
    bboxes = utils.postprocess_boxes(pred_bbox, original_image_size,
                                     input_size, 0.25)
    bboxes = utils.nms(bboxes, 0.213, method='nms')

    image = utils.draw_bbox(original_image, bboxes)
    image = Image.fromarray(image)
    #image.show()
    image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    cv2.imwrite(FLAGS.output, image)
Beispiel #5
0
def main(argv):
    NUM_CLASS = 2
    ANCHORS = [
        12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243,
        459, 401
    ]
    ANCHORS = np.array(ANCHORS, dtype=np.float32)
    ANCHORS = ANCHORS.reshape(3, 3, 2)
    STRIDES = [8, 16, 32]
    XYSCALE = [1.2, 1.1, 1.05]
    input_size = FLAGS.size
    video_path = FLAGS.video_path
    score_thresh = FLAGS.score_thresh
    iou_thresh = FLAGS.iou_thresh
    save_path = FLAGS.save_path

    print(f'[DEBUG][video] input_size : {input_size}')
    print(f'[DEBUG][video] video_path : {video_path}')
    print(f'[DEBUG][video] score_thresh : {score_thresh}')
    print(f'[DEBUG][video] iou_thresh : {iou_thresh}')
    print(f'[DEBUG][video] save_path : {save_path}')

    print('[INFO] Bulding Yolov4 architecture')
    tic = time.perf_counter()

    input_layer = tf.keras.layers.Input([input_size, input_size, 3])
    print(f'[INFO][video] Created input_layer of size {input_size}')
    print(f'[DEBUG][video] input_layer : {input_layer}')

    feature_maps = YOLOv4(input_layer, NUM_CLASS)

    print(f'[DEBUG][video] feature_maps : {feature_maps}')
    bbox_tensors = []
    for i, fm in enumerate(feature_maps):
        bbox_tensors.append(decode(fm, NUM_CLASS, i))

    model = tf.keras.Model(input_layer, bbox_tensors)
    utils.load_weights(model, FLAGS.weights)

    toc = time.perf_counter()
    print(f'[INFO] Architecture built.')
    print(f'[DEBUG][video] Execution took {(1000 * (toc - tic)):0.4f} ms')

    vid = cv2.VideoCapture(video_path)

    if save_path:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        print(f"[DEBUG][video] Video CODEC : {FLAGS.save_path.split('.')[1]}")
        codec = cv2.VideoWriter_fourcc(*'MJPEG')
        out = cv2.VideoWriter(FLAGS.save_path, codec, fps, (width, height))

    while True:
        return_value, frame = vid.read()
        if return_value:
            print(f'[DEBUG] Got video capture')
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print(f"[DEBUG][video] Video Over")
            vid.release()
            if save_path:
                out.release()
            break
            #raise ValueError("No image! Try with another video format")
        frame_size = frame.shape[:2]

        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        prev_time = time.perf_counter()

        pred_bbox = model.predict(image_data)
        print(f'[INFO][video] Finished initial predication on image')

        pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                            XYSCALE)

        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         score_thresh)

        bboxes = utils.nms(bboxes, iou_thresh, method='nms')

        image = utils.draw_bbox(frame, bboxes)

        curr_time = time.perf_counter()
        exec_time = curr_time - prev_time
        result = np.asarray(image)
        info = "fdpms: %.2f ms" % (1000 * exec_time)

        print(info)

        cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imshow("result", result)
        print(result.shape)
        if save_path:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Beispiel #6
0
import tensorflow as tf
import core.utils as utils
from core.config import cfg
from core.yolov4 import YOLOv4, decode
from PIL import Image
from matplotlib.pyplot import imshow
from urllib.request import urlopen
from scipy.misc import imread

INPUT_SIZE   = 320
NUM_CLASS    = len(utils.read_class_names(cfg.YOLO.CLASSES))
CLASSES      = utils.read_class_names(cfg.YOLO.CLASSES)

print(NUM_CLASS)
print(CLASSES)

# Build Model
input_layer  = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
feature_maps = YOLOv4(input_layer)

bbox_tensors = []
for i, fm in enumerate(feature_maps):
    bbox_tensor = decode(fm, i)
    bbox_tensors.append(bbox_tensor)
    print(bbox_tensors)

model = tf.keras.Model(input_layer, bbox_tensors)
model.load_weights("./checkpoints/yolov4")
model.summary()
model.save("./e4.h5")
Beispiel #7
0
def main(_argv):
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size
    video_path = FLAGS.video

    print("Video from: ", video_path )
    vid = cv2.VideoCapture(video_path)
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(vid.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    output_movie = cv2.VideoWriter('output' + str(round(time.time()))+ '.avi', fourcc, fps, (width, height))
    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                
                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights":
                    utils.load_weights(model, FLAGS.weights)
                else:
                    model.load_weights(FLAGS.weights).expect_partial()

        model.summary()
    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)

    total_passed_vehicle = 0
    speed = "waiting..."
    direction = "waiting..."
    size = "waiting..."
    color = "waiting..."
    counting_mode = "..."
    width_heigh_taken = True

    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            raise ValueError("No image! Try with another video format")
        frame_size = frame.shape[:2]
        image_data = utils.image_preprocess(np.copy(frame), [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        prev_time = time.time()

        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        else:
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)

        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.25)
        boxes = bboxes[:, 0:4]
        scores = bboxes[:, 4]
        classes = bboxes[:, 5]
        #bboxes = utils.nms(bboxes, 0.213, method='nms')
        roi = 450
        category_index = utils.read_class_names(cfg.YOLO.CLASSES)
        counter, csv_line, counting_mode = vis_util.visualize_boxes_and_labels_on_image_array_y_axis(vid.get(1),
                                                                                                            frame,
                                                                                                            1,
                                                                                                            False,
                                                                                                            np.squeeze(boxes),
                                                                                                            np.squeeze(classes).astype(np.int32),
                                                                                                            np.squeeze(scores),
                                                                                                            category_index,
                                                                                                            y_reference = roi,
                                                                                                            use_normalized_coordinates=True,
                                                                                                            line_thickness=4)


        if counter == 1:
            cv2.line(frame, (roi, 0), (roi, height), (0, 0xFF, 0), 5)
        else:
            cv2.line(frame, (roi, 0), (roi, height), (0, 0, 0xFF), 5)

        total_passed_vehicle = total_passed_vehicle + counter

        # insert information text to video frame
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(
            input_frame,
            'Veiculos Detectados: ' + str(total_passed_vehicle),
            (10, 35),
            font,
            0.8,
            (0, 0xFF, 0xFF),
            2,
            cv2.FONT_HERSHEY_SIMPLEX,
            )               
        
        cv2.putText(
            input_frame,
            'Linha de ROI',
            (545, roi-10),
            font,
            0.6,
            (0, 0, 0xFF),
            2,
            cv2.LINE_AA,
            )
        # image = utils.draw_bbox(frame, bboxes)
        # curr_time = time.time()
        # exec_time = curr_time - prev_time
        # result = np.asarray(image)
        # info = "time: %.2f ms" %(1000*exec_time)
        # print(info)
        # cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
        # result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # cv2.imshow("result", result)
        # if cv2.waitKey(1) & 0xFF == ord('q'): break
        output_movie.write(frame)
        print ("writing frame")

        if cv2.waitKey(1) & 0xFF == ord('q'): break
    vid.release()   
    output_movie.release()
    cv2.destroyAllWindows()
def main():
    
    #not sure whether this is effective or not
    tf.executing_eagerly()
    strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    with strategy.scope():
    # if True:
        
        #SETTINGS TO ADJUST-------------------------------------------     
        
        #whether or not to save video to output file or show on screen
        RECORD = False
        INPUT_VID = 'aot1'
        #INPUT_VID = 'mrb3'
        #INPUT_VID
        OUTPUT_VID= 'C:/Users/Nikki/Documents/work/inputs-outputs/vid_output/' + INPUT_VID + '.avi'
        SHOW_VID = True
        THROWOUT_NUM = 3           #min is 1
        INPUT_SIZE = 419 #608 #230 #999 #800
        
        #initialize constants
        STRIDES = np.array(cfg.YOLO.STRIDES)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS)
        NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
        XYSCALE = cfg.YOLO.XYSCALE
        WEIGHTS = './data/yolov4.weights'   #must end in .weights



        #setup variables based on what video is being used
        video_path, GPS_pix, pix_GPS, origin = pg.sample_select(INPUT_VID)
        video_path = addresses.TEST
        #start video capture
        print("Video from: ", video_path )
        vid = cv2.VideoCapture(video_path)
        
        #initialize occupancy and compliance buffers
        buf_size = 5
        count_buf = buf_size * [0]
        ind = 0
        people_buf = buf_size * [0]
        
        #open file to output to
        output_f = 'C:/Users/Nikki/Documents/work/inputs-outputs/txt_output/' + INPUT_VID + '.txt'
        f = open(output_f, 'w')
        print('file started')
        f.write('Time\t\t\t\tPed\t<6ft\n')
        
        #define writer and output video properties
        if RECORD:
            fps = vid.get(5)
            wdt = int(vid.get(3))
            hgt = int(vid.get(4))
            fourcc = cv2.VideoWriter_fourcc(*'MJPG')
            out_vid = cv2.VideoWriter(OUTPUT_VID, fourcc, fps/THROWOUT_NUM, (wdt, hgt))


        
        #generate model
        input_layer = tf.keras.Input([INPUT_SIZE, INPUT_SIZE, 3])
        feature_maps = YOLOv4(input_layer, NUM_CLASS)
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            bbox_tensor = decode(fm, NUM_CLASS, i)
            bbox_tensors.append(bbox_tensor)    
        model = tf.keras.Model(input_layer, bbox_tensors)
        print('model built')
        
        
        #force to run eagerly
        model.run_eagerly = True
        
        #load existing weights into model
        utils.load_weights(model, WEIGHTS)
 
        #continue reading and showing frames until interrupted
        try:
            while True:
                
                #skip desired number of frames to speed up processing
                for i in range (THROWOUT_NUM):
                    vid.grab()
                
                #get current time and next frame
                dt = str(datetime.datetime.now())    
                return_value, frame = vid.retrieve()
                
                # check that the next frame exists, if not, close display window and exit loop
                if return_value:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    #image = Image.fromarray(frame)
                else:
                    if SHOW_VID:
                        cv2.destroyWindow('result')
                    print('Video has ended')
                    break
                
                #resize image and add another dimension
                frame_size = frame.shape[:2]
                cur_frame = np.copy(frame)
                image_data = utils.image_preprocess(cur_frame, [INPUT_SIZE, INPUT_SIZE]) 
                image_data = image_data[np.newaxis, ...].astype(np.float32)
                
               
                prev_time = time.time()  #for calculating how long it takes to process a frame
                
                
                with tf.device('/GPU:0'):
                    image_data = tf.convert_to_tensor(image_data)
                    print(image_data.device)
                
                #for calculating how long it takes to process a frame
                curr_time = time.time()
                exec_time = curr_time - prev_time
                info = "time1: %.2f ms" %(1000*exec_time)
                print(info)
                prev_time = time.time()
                
                #make bboxes
                pred_bbox = model.predict(image_data)
                pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE)
                all_bboxes, probs, classes = utils.postprocess_boxes(pred_bbox, frame_size, INPUT_SIZE, 0.25)#.25
                bboxes = utils.filter_people(all_bboxes, probs, classes)
    
                #only continue processing if there were people identified
                if len(bboxes) > 0:
                    #get rid of redundant boxes
                    bboxes = utils.nms(bboxes, 0.213, method='nms') #.213
                    
                    #draw bbox and get centered point at base of box
                    frame = utils.draw_bbox(frame, bboxes, show_label = False)
                    pts = utils.get_ftpts(bboxes)
                    
                    #draw radii and count people
                    frame, count_buf[ind] = pg.draw_radius(frame, pts, GPS_pix, pix_GPS, origin)
                    people_buf[ind] = pts.shape[0]
                else:
                    count_buf[ind] = 0
                    people_buf[ind] = 0
                    
                #avg people and count within 6ft buffers   
                people = int(sum(people_buf)/len(people_buf))
                count = int(sum(count_buf)/len(count_buf))
                
                #write info to file and overlay on video
                utils.video_write_info(f, bboxes, dt, count, people)
                utils.overlay_occupancy(frame, count, people, frame_size)
                
                #for calculating how long it takes to process a frame
                curr_time = time.time()
                exec_time = curr_time - prev_time
                info = "time2: %.2f ms" %(1000*exec_time)
                print(info)
                
                #convert frame to correct cv colors and display/record
                result = np.asarray(frame)
                result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
                
                if SHOW_VID:
                    cv2.namedWindow("result", cv2.WINDOW_NORMAL)
                    cv2.imshow("result", result)
                if RECORD:
                    out_vid.write(result)
                    
                if cv2.waitKey(1) & 0xFF == ord('q'): break
                
                #increment index
                ind = (ind + 1) % buf_size
                
            #end video, close viewer, stop writing to file
            vid.release()
            if RECORD:
                out_vid.release()
            if SHOW_VID:
                cv2.destroyAllWindows()
            f.close()
            
        #if interrupted, end video, close viewer, stop writing to file
        except:
            print("Unexpected error:", sys.exc_info()[0])
            vid.release()
            if RECORD == True:
                out_vid.release()
            if SHOW_VID:
                cv2.destroyAllWindows()
            f.close()
    def __init__(self,
                 framework='tf',
                 size=608,
                 tiny=False,
                 model='yolov4',
                 NUM_CLASS=len(utils.read_class_names(cfg.YOLO.CLASSES)),
                 load_h5=False,
                 h5_file=None):
        self.framework = framework
        self.weights = 'weights'  #None#weights
        self.size = size
        self.tiny = tiny
        self.model = model
        self.instanciated_model = None

        # Instanciate model

        if load_h5:
            print('Loading Model from h5 file')
            self.instanciated_model = tf.keras.models.load_model(h5_file)

        else:
            print('Tiny ', self.tiny)

            #image_path = self.image
            #NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
            input_size = self.size
            if self.framework == 'tf':
                input_layer = tf.keras.layers.Input(
                    [input_size, input_size, 3])
                if self.tiny:
                    if self.model == 'yolov3':
                        feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
                        self.weights = os.path.join(self.weights,
                                                    'yolov3-tiny.weights')
                    else:
                        feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS)
                        self.weights = os.path.join(self.weights,
                                                    'yolov4-tiny.weights')
                    bbox_tensors = []
                    for i, fm in enumerate(feature_maps):
                        bbox_tensor = decode(fm, NUM_CLASS, i)
                        bbox_tensors.append(bbox_tensor)
                    model = tf.keras.Model(input_layer, bbox_tensors)
                    model.summary()

                    ##Added
                    if self.weights.split(".")[len(self.weights.split(".")) -
                                               1] == "weights":
                        print('test_0')
                        if self.model == 'yolov3':
                            utils.load_weights_tiny(model, self.weights,
                                                    'yolov3')
                        else:
                            utils.load_weights_tiny(model, self.weights,
                                                    'yolov4')
                    else:
                        print('test_1')
                        model.load_weights(self.weights).expect_partial()

                    #utils.load_weights_tiny(model, self.weights, self.model)
                    ##
                else:
                    if self.model == 'yolov3':
                        feature_maps = YOLOv3(input_layer, NUM_CLASS)
                        bbox_tensors = []
                        for i, fm in enumerate(feature_maps):
                            bbox_tensor = decode(fm, NUM_CLASS, i)
                            bbox_tensors.append(bbox_tensor)
                        model = tf.keras.Model(input_layer, bbox_tensors)

                        yolov3_weights_path = os.path.join(
                            self.weights, 'yolov4.weights')
                        #utils.load_weights_v3(model, self.weights)
                    elif self.model == 'yolov4':
                        feature_maps = YOLOv4(input_layer, NUM_CLASS)
                        bbox_tensors = []
                        for i, fm in enumerate(feature_maps):
                            bbox_tensor = decode(fm, NUM_CLASS, i)
                            bbox_tensors.append(bbox_tensor)
                        model = tf.keras.Model(input_layer, bbox_tensors)

                        # Check if files have already been downloaded
                        yolov4_weights_path = os.path.join(
                            self.weights, 'yolov4.weights')
                        #yolov4_weights_path = os.path.join(Path(os.path.realpath(__file__)).parent,'data/yolov4.weights')

                        if not os.path.exists(yolov4_weights_path):
                            print('Downloading weights file')
                            self.weights = self.download(
                                'yolov4.weights', local_path=self.weights)
                            print('Weight file was downloaded to',
                                  self.weights)
                        else:
                            print('Weights file already downloaded')
                            self.weights = yolov4_weights_path

                        if self.weights.split(".")[len(self.weights.split("."))
                                                   - 1] == "weights":
                            if self.model == 'yolov3':
                                utils.load_weights(model, yolov3_weights_path)
                            elif self.model == 'yolov4':
                                utils.load_weights(model, self.weights)
                        else:
                            model.load_weights(self.weights).expect_partial()

                self.instanciated_model = model

            else:
                # Load TFLite model and allocate tensors.
                interpreter = tf.lite.Interpreter(model_path=self.weights)
                interpreter.allocate_tensors()

                self.instanciated_model = interpreter
Beispiel #10
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    trainset = Dataset('train')
    testset = Dataset('test')
    logdir = "./data/log"
    isfreeze = False
    steps_per_epoch = len(trainset)
    first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
    second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
    global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
    warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
    total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch
    # train_steps = (first_stage_epochs + second_stage_epochs) * steps_per_period

    input_layer = tf.keras.layers.Input(
        [cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3])
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    STRIDES = np.array(cfg.YOLO.STRIDES)
    IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
    XYSCALE = cfg.YOLO.XYSCALE
    ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS)

    if FLAGS.tiny:
        feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i)
            bbox_tensors.append(fm)
            bbox_tensors.append(bbox_tensor)
        model = tf.keras.Model(input_layer, bbox_tensors)
    else:
        if FLAGS.model == 'yolov3':
            feature_maps = YOLOv3(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i)
                bbox_tensors.append(fm)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
        elif FLAGS.model == 'yolov4':
            feature_maps = YOLOv4(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode_train(fm, NUM_CLASS, STRIDES, ANCHORS, i,
                                           XYSCALE)
                bbox_tensors.append(fm)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)

    if FLAGS.weights == None:
        print("Training from scratch")
    else:
        if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) -
                                    1] == "weights":
            if FLAGS.tiny:
                utils.load_weights_tiny(model, FLAGS.weights)
            else:
                if FLAGS.model == 'yolov3':
                    utils.load_weights_v3(model, FLAGS.weights)
                else:
                    utils.load_weights(model, FLAGS.weights)
        else:
            model.load_weights(FLAGS.weights)
        print('Restoring weights from: %s ... ' % FLAGS.weights)

    optimizer = tf.keras.optimizers.Adam()
    if os.path.exists(logdir): shutil.rmtree(logdir)
    writer = tf.summary.create_file_writer(logdir)

    def train_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(3):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred,
                                          conv,
                                          target[i][0],
                                          target[i][1],
                                          STRIDES=STRIDES,
                                          NUM_CLASS=NUM_CLASS,
                                          IOU_LOSS_THRESH=IOU_LOSS_THRESH,
                                          i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            gradients = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))
            tf.print(
                "=> STEP %4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
                "prob_loss: %4.2f   total_loss: %4.2f" %
                (global_steps, optimizer.lr.numpy(), giou_loss, conf_loss,
                 prob_loss, total_loss))
            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
            else:
                lr = cfg.TRAIN.LR_END + 0.5 * (
                    cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos(
                        (global_steps - warmup_steps) /
                        (total_steps - warmup_steps) * np.pi)))
            optimizer.lr.assign(lr.numpy())

            # writing summary data
            with writer.as_default():
                tf.summary.scalar("lr", optimizer.lr, step=global_steps)
                tf.summary.scalar("loss/total_loss",
                                  total_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/giou_loss",
                                  giou_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/conf_loss",
                                  conf_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/prob_loss",
                                  prob_loss,
                                  step=global_steps)
            writer.flush()

    def test_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(3):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred,
                                          conv,
                                          target[i][0],
                                          target[i][1],
                                          STRIDES=STRIDES,
                                          NUM_CLASS=NUM_CLASS,
                                          IOU_LOSS_THRESH=IOU_LOSS_THRESH,
                                          i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            tf.print(
                "=> TEST STEP %4d   giou_loss: %4.2f   conf_loss: %4.2f   "
                "prob_loss: %4.2f   total_loss: %4.2f" %
                (global_steps, giou_loss, conf_loss, prob_loss, total_loss))

    for epoch in range(first_stage_epochs + second_stage_epochs):
        if epoch < first_stage_epochs:
            if not isfreeze:
                isfreeze = True
                for name in ['conv2d_93', 'conv2d_101', 'conv2d_109']:
                    freeze = model.get_layer(name)
                    freeze_all(freeze)
        elif epoch >= first_stage_epochs:
            if isfreeze:
                isfreeze = False
                for name in ['conv2d_93', 'conv2d_101', 'conv2d_109']:
                    freeze = model.get_layer(name)
                    unfreeze_all(freeze)
        for image_data, target in trainset:
            train_step(image_data, target)
        for image_data, target in testset:
            test_step(image_data, target)
        model.save_weights("./checkpoints/yolov4")
Beispiel #11
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
        XYSCALE = cfg.YOLO.XYSCALE_TINY
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        XYSCALE = cfg.YOLO.XYSCALE
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)

    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)
    NUM_CLASS = len(CLASSES)
    input_size = FLAGS.size
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    times = []
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            else:
                feature_maps = YOLOv4_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) -
                                            1] == "weights":
                    utils.load_weights(model, FLAGS.weights)
                else:
                    model.load_weights(FLAGS.weights).expect_partial()
        model.summary()
    elif FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    max_cosine_distance = 0.5  # 0.5 / 0.7
    nn_budget = None
    model_filename = './weights/tracker/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    key_list = list(CLASSES.keys())
    val_list = list(CLASSES.values())
    Track_only = ["person"]

    nacho_image = face_recognition.load_image_file("data/faces/nacho.jpg")
    nacho_face_encoding = face_recognition.face_encodings(nacho_image)[0]
    known_face_encodings = [nacho_face_encoding]
    known_face_names = ["Nacho"]

    logging.info("Models loaded!")

    while True:
        return_value, frame = vid.read()
        if not return_value:
            logging.warning("Empty Frame")
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_size = frame.shape[:2]
        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        elif FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
        elif FLAGS.framework == 'trt':
            batched_input = tf.constant(image_data)
            pred_bbox = []
            result = infer(batched_input)
            for _, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                                XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.5)  # 0.25
        bboxes = utils.nms(bboxes, 0.213, method='nms')  # 0.213

        bboxes = utils.calculate_safety(bboxes)

        # FACE RECOGNITION PART
        face_locations = face_recognition.face_locations(frame)
        face_encodings = face_recognition.face_encodings(frame, face_locations)
        face_names = []
        for face_encoding in face_encodings:
            matches = face_recognition.compare_faces(known_face_encodings,
                                                     face_encoding)
            name = "Unknown"
            if True in matches:
                first_match_index = matches.index(True)
                name = known_face_names[first_match_index]
            # face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
            # best_match_index = np.argmin(face_distances)
            # if matches[best_match_index]:
            #     name = known_face_names[best_match_index]
            face_names.append(name)

        for bbox in bboxes:
            person_coor = np.array(bbox[:4], dtype=np.int32)
            for (top, right, bottom,
                 left), name in zip(face_locations, face_names):
                face_coor = np.array([left, top, right, bottom],
                                     dtype=np.int32)
                iou_score = utils.calculate_iou(person_coor, face_coor)
                if iou_score > 0.75:
                    bbox.append(name)
                    break
            if len(bbox) < 8:
                bbox.append("Unknown")

        boxes, scores, names, safety_scores, face_ids = [], [], [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and CLASSES[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(CLASSES[int(bbox[5])])
                safety_scores.append(int(bbox[6]))
                face_ids.append(bbox[7])

        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(frame, boxes))
        safety_scores = np.array(safety_scores)
        face_ids = np.array(face_ids)
        detections = [
            Detection(bbox, score, class_name, feature, face_name_id,
                      safety_score)
            for bbox, score, class_name, feature, face_name_id, safety_score in
            zip(boxes, scores, names, features, face_ids, safety_scores)
        ]

        tracker.predict()
        tracker.update(detections)

        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  # 1 / 5
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            # tracking_id = track.track_id
            # index = key_list[val_list.index(class_name)]
            face_name_id = track.get_face_name()
            safety_score = track.get_safety_score()

            tracked_bboxes.append(bbox.tolist() + [face_name_id, safety_score])

        image = utils.draw_demo(frame, tracked_bboxes)
        # image = utils.draw_bbox(frame, tracked_bboxes,
        #                         classes=CLASSES, tracking=True)
        # image = cv2.putText(image, "Time: {:.2f} FPS".format(
        #     fps), (0, 24), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("Detections", image)
        if FLAGS.output:
            out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
def main(_argv):
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    input_size = FLAGS.size
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)
    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        signature_keys = list(saved_model_loaded.signatures.keys())
        print(signature_keys)
        infer = saved_model_loaded.signatures['serving_default']

    logging.info('weights loaded')

    @tf.function
    def run_model(x):
        return model(x)

    # Test the TensorFlow Lite model on random input data.
    sum = 0
    original_image = cv2.imread(FLAGS.image)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    original_image_size = original_image.shape[:2]
    image_data = utils.image_preporcess(np.copy(original_image), [FLAGS.size, FLAGS.size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    img_raw = tf.image.decode_image(
        open(FLAGS.image, 'rb').read(), channels=3)
    img_raw = tf.expand_dims(img_raw, 0)
    img_raw = tf.image.resize(img_raw, (FLAGS.size, FLAGS.size))
    batched_input = tf.constant(image_data)
    for i in range(1000):
        prev_time = time.time()
        # pred_bbox = model.predict(image_data)
        if FLAGS.framework == 'tf':
            pred_bbox = run_model(image_data)
        elif FLAGS.framework == 'trt':
            pred_bbox = infer(batched_input)
        # pred_bbox = pred_bbox.numpy()
        curr_time = time.time()
        exec_time = curr_time - prev_time
        if i == 0: continue
        sum += (1 / exec_time)
        info = str(i) + " time:" + str(round(exec_time, 3)) + " average FPS:" + str(round(sum / i, 2)) + ", FPS: " + str(
            round((1 / exec_time), 1))
        print(info)
Beispiel #13
0
def main(_argv):
    INPUT_SIZE = FLAGS.size
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)
    predicted_dir_path = './mAP/predicted'
    ground_truth_dir_path = './mAP/ground-truth'
    if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path)
    if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path)
    if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH)

    os.mkdir(predicted_dir_path)
    os.mkdir(ground_truth_dir_path)
    os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH)

    # Build Model
    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([INPUT_SIZE, INPUT_SIZE, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)

    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)

    num_lines = sum(1 for line in open(FLAGS.annotation_path))
    with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file:
        for num, line in enumerate(annotation_file):
            annotation = line.strip().split()
            image_path = annotation[0]
            image_name = image_path.split('/')[-1]
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            bbox_data_gt = np.array([list(map(int, box.split(','))) for box in annotation[1:]])

            if len(bbox_data_gt) == 0:
                bboxes_gt = []
                classes_gt = []
            else:
                bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
            ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt')

            print('=> ground truth of %s:' % image_name)
            num_bbox_gt = len(bboxes_gt)
            with open(ground_truth_path, 'w') as f:
                for i in range(num_bbox_gt):
                    class_name = CLASSES[classes_gt[i]]
                    xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
                    bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print('=> predict result of %s:' % image_name)
            predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt')
            # Predict Process
            image_size = image.shape[:2]
            image_data = utils.image_preporcess(np.copy(image), [INPUT_SIZE, INPUT_SIZE])
            image_data = image_data[np.newaxis, ...].astype(np.float32)

            if FLAGS.framework == "tf":
                pred_bbox = model.predict(image_data)
            else:
                interpreter.set_tensor(input_details[0]['index'], image_data)
                interpreter.invoke()
                pred_bbox = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            if FLAGS.model == 'yolov3':
                pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
            elif FLAGS.model == 'yolov4':
                XYSCALE = cfg.YOLO.XYSCALE
                pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE)

            pred_bbox = tf.concat(pred_bbox, axis=0)
            bboxes = utils.postprocess_boxes(pred_bbox, image_size, INPUT_SIZE, cfg.TEST.SCORE_THRESHOLD)
            bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms')

            if cfg.TEST.DECTECTED_IMAGE_PATH is not None:
                image = utils.draw_bbox(image, bboxes)
                cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image)

            with open(predict_result_path, 'w') as f:
                for bbox in bboxes:
                    coor = np.array(bbox[:4], dtype=np.int32)
                    score = bbox[4]
                    class_ind = int(bbox[5])
                    class_name = CLASSES[class_ind]
                    score = '%.4f' % score
                    xmin, ymin, xmax, ymax = list(map(str, coor))
                    bbox_mess = ' '.join([class_name, score, xmin, ymin, xmax, ymax]) + '\n'
                    f.write(bbox_mess)
                    print('\t' + str(bbox_mess).strip())
            print(num, num_lines)
Beispiel #14
0
def main(_argv):
    #     physical_devices = tf.config.experimental.list_physical_devices('GPU')
    #     if len(physical_devices) > 0:
    #         tf.config.experimental.set_memory_growth(physical_devices[0], True)
    trainset = Dataset(is_training=True)
    testset = Dataset(is_training=False)
    logdir = "./data/log"
    isfreeze = False
    steps_per_epoch = len(trainset)
    first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
    second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
    global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
    warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
    total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch
    input_layer = tf.keras.layers.Input(
        [cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3])
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config()
    IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH

    freeze_layers = utils.load_freeze_layer()

    feature_maps = YOLOv4(input_layer, NUM_CLASS)

    bbox_tensors = []
    for i, fm in enumerate(feature_maps):
        if i == 0:
            bbox_tensor = decode_train(fm, SIZE // 8, NUM_CLASS, STRIDES,
                                       ANCHORS, i, XYSCALE)
        elif i == 1:
            bbox_tensor = decode_train(fm, SIZE // 16, NUM_CLASS, STRIDES,
                                       ANCHORS, i, XYSCALE)
        else:
            bbox_tensor = decode_train(fm, SIZE // 32, NUM_CLASS, STRIDES,
                                       ANCHORS, i, XYSCALE)
        bbox_tensors.append(fm)
        bbox_tensors.append(bbox_tensor)  #confidence map

    model = tf.keras.Model(input_layer, bbox_tensors)
    model.summary()

    optimizer = tf.keras.optimizers.Adam()
    if os.path.exists(logdir): shutil.rmtree(logdir)
    writer = tf.summary.create_file_writer(logdir)

    # define training step function
    def train_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred,
                                          conv,
                                          target[i][0],
                                          target[i][1],
                                          STRIDES=STRIDES,
                                          NUM_CLASS=NUM_CLASS,
                                          IOU_LOSS_THRESH=IOU_LOSS_THRESH,
                                          i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            gradients = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients,
                                          model.trainable_variables))
            tf.print(
                "=> STEP %4d/%4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
                "prob_loss: %4.2f   total_loss: %4.2f" %
                (global_steps, total_steps, optimizer.lr.numpy(), giou_loss,
                 conf_loss, prob_loss, total_loss))
            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
            else:
                lr = cfg.TRAIN.LR_END + 0.5 * (
                    cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos(
                        (global_steps - warmup_steps) /
                        (total_steps - warmup_steps) * np.pi)))
            optimizer.lr.assign(lr.numpy())

            # writing summary data
            with writer.as_default():
                tf.summary.scalar("lr", optimizer.lr, step=global_steps)
                tf.summary.scalar("loss/total_loss",
                                  total_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/giou_loss",
                                  giou_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/conf_loss",
                                  conf_loss,
                                  step=global_steps)
                tf.summary.scalar("loss/prob_loss",
                                  prob_loss,
                                  step=global_steps)
            writer.flush()

    def test_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred,
                                          conv,
                                          target[i][0],
                                          target[i][1],
                                          STRIDES=STRIDES,
                                          NUM_CLASS=NUM_CLASS,
                                          IOU_LOSS_THRESH=IOU_LOSS_THRESH,
                                          i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            tf.print(
                "=> TEST STEP %4d   giou_loss: %4.2f   conf_loss: %4.2f   "
                "prob_loss: %4.2f   total_loss: %4.2f" %
                (global_steps, giou_loss, conf_loss, prob_loss, total_loss))

    for epoch in range(first_stage_epochs + second_stage_epochs):
        if epoch < first_stage_epochs:
            if not isfreeze:
                isfreeze = True
                for name in freeze_layers:
                    freeze = model.get_layer(name)
                    freeze_all(freeze)
        elif epoch >= first_stage_epochs:
            if isfreeze:
                isfreeze = False
                for name in freeze_layers:
                    freeze = model.get_layer(name)
                    unfreeze_all(freeze)
        for image_data, target in trainset:
            train_step(image_data, target)
        for image_data, target in testset:
            test_step(image_data, target)
        model.save_weights("./checkpoints/yolov4")
def main(_argv):
    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)
    NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
    XYSCALE = cfg.YOLO.XYSCALE
    input_size = FLAGS.size
    image_path = FLAGS.image

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            feature_maps = YOLOv3_tiny(input_layer, NUM_CLASS)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASS, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASS)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASS, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights(model, FLAGS.weights)
    else:
        # Load TFLite model and allocate tensors.
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)

    host = ''  # Symbolic name meaning all available interfaces
    port = 45678  # Arbitrary non-privileged port
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind((host, port))

    s.listen(1)
    print('Initializing connection')
    conn, addr = s.accept()
    print('Connected by', addr)
    data = (conn.recv(1024))
    data = json.loads(data.decode())

    # conn.sendall(json.dumps(data).encode())
    #print('Recived iniatializing data')
    shape = data.get("shape")
    shared_memory_name = data.get("name")
    type_data = data.get("type")
    object_to_find = data.get("object")

    # Attach to the existing shared memory block
    existing_shm = shared_memory.SharedMemory(name=shared_memory_name)
    # Note that a.shape is (6,) and a.dtype is np.int64 in this example
    previous = None
    frame = None
    # print('objeto a encontrar ' + object_to_find)
    print('Esperando imágenes')

    coords = np.empty(4, dtype=np.int32)
    while (True):
        start_time = time.time()
        # print('Waiting frame')
        data = (conn.recv(1024))
        if data == bytes('1', 'utf8'):
            previous = frame
            frame = np.ndarray(shape, dtype=type_data, buffer=existing_shm.buf)
            frame, object_found, coords = process(frame, input_size, model,
                                                  object_to_find, FLAGS,
                                                  ANCHORS, STRIDES, XYSCALE)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            if (object_found):
                data = json.dumps({
                    "was_found": object_found,
                    "coords": coords
                })
            else:
                data = json.dumps({"was_found": object_found})

            conn.sendall(data.encode())
            #print("FPS: ", 1.0 / (time.time() - start_time))
    existing_shm.close()