コード例 #1
0
    def demo(self, pt):

        img_ori = cv2.imread(pt)
        if self.resize:
            img, resize_ratio, dw, dh = letterbox_resize(
                img_ori, self.new_size[0], self.new_size[1])
        else:
            height_ori, width_ori = img_ori.shape[:2]
            img = cv2.resize(img_ori, tuple(self.new_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, np.float32)
        img = img[np.newaxis, :] - 127.5

        boxes_, scores_, labels_ = self.sess.run(
            [self.boxes, self.scores, self.labels],
            feed_dict={self.input_data: img})

        # rescale the coordinates to the original image
        if letterbox_resize:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori / float(new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori / float(new_size[1]))

        for i in range(len(boxes_)):
            x0, y0, x1, y1 = boxes_[i].astype(np.int)
            cv2.rectangle(img_ori, (x0, y0), (x1, y1), (0, 200, 255), 4)
            res, con = HyperLPR_plate_recognition(img_ori, (x0, y0, x1, y1))
            label = '置信度: {:.2f}%\n'.format(
                scores_[i] * 100) + self.get_time(res)
            img_ori = self.drawTest(img_ori, label, 10, 10)

        cv2.imshow('result', img_ori)
        cv2.waitKey(0)
コード例 #2
0
def demo(input_image):

    img_ori = cv2.imread(input_image)
    if resize:
        img, resize_ratio, dw, dh = letterbox_resize(
            img_ori, new_size[0], new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] - 127.5

    boxes_, scores_, labels_ = sess.run(
        [boxes, scores, labels], feed_dict={input_data: img})

    # rescale the coordinates to the original image
    if letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori/float(new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori/float(new_size[1]))

    print("box coords:")
    print(boxes_)
    print('*' * 30)
    print("scores:")
    print(scores_)
    print('*' * 30)
    print("labels:")
    print(labels_)

    for i in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[i]
        plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[
            i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])

    cv2.imshow('result', img_ori)
    cv2.waitKey(0)
コード例 #3
0
    def GetBoundingBox(self, img_ori, half):
        from utils.data_aug import letterbox_resize

        if img_ori.shape[2] == 4:
            img_ori = img_ori[:, :, :3]
        if half:  # Grab left half of the image
            height, width = img_ori.shape[:2]
            start_row, start_col = int(0), int(0)
            end_row, end_col = int(height), int(width // 2)
            img_ori = img_ori[start_row:end_row, start_col:end_col]

        self.height_ori, self.width_ori = img_ori.shape[:2]
        # print('start resize')
        if self.letterbox_resize:
            img, resize_ratio, dw, dh = letterbox_resize(
                img_ori, self.new_size[0], self.new_size[1])
        else:
            height_ori, width_ori = img_ori.shape[:2]
            img = cv2.resize(img_ori, tuple(self.new_size))
        # print('resize end')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, np.float32)
        img = img[np.newaxis, :] / 255.

        # print('run TF')
        boxes_, scores_, labels_, map4_ = self.sess.run(
            [self.boxes, self.scores, self.labels, self.map4],
            feed_dict={self.input_data: img})
        # print('after TF')

        # rescale the coordinates to the original image
        if self.letterbox_resize:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori / float(self.new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori / float(self.new_size[1]))

        return boxes_, scores_, map4_[0]
コード例 #4
0
    "--restore_path",
    type=str,
    default=
    "/media/ubutnu/fc1a3be7-9b03-427e-9cc9-c4b242cefbff/YOLOv3_TensorFlow/checkpoint/model-epoch_90_step_175083_loss_0.4213_lr_1e-05",
    help="The path of the weights to restore.")
args = parser.parse_args()

args.anchors = parse_anchors(args.anchor_path)
args.classes = read_class_names(args.class_name_path)
args.num_class = len(args.classes)

color_table = get_color_table(args.num_class)

img_ori = cv2.imread(args.input_image)
if args.letterbox_resize:
    img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0],
                                                 args.new_size[1])
else:
    height_ori, width_ori = img_ori.shape[:2]
    img = cv2.resize(img_ori, tuple(args.new_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.

with tf.Session() as sess:
    input_data = tf.placeholder(tf.float32,
                                [1, args.new_size[1], args.new_size[0], 3],
                                name='input_data')
    yolo_model = yolov3(args.num_class, args.anchors)
    with tf.variable_scope('yolov3'):
        pred_feature_maps = yolo_model.forward(input_data, False)
    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
コード例 #5
0
def yolodet(image_path,
            anchor_path=rootpath + "/yolo/data/yolo_anchors.txt",
            new_size=[416, 416],
            letterbox=True,
            class_name_path=rootpath + "/yolo/data/coco.names",
            restore_path=rootpath + "/yolo/data/best_model"):

    anchors = parse_anchors(anchor_path)
    classes = read_class_names(class_name_path)
    num_class = len(classes)
    color_table = get_color_table(num_class)

    img_ori = cv2.imread(image_path)
    if letterbox:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0],
                                                     new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.

    with tf.Session() as sess:
        input_data = tf.placeholder(tf.float32,
                                    [1, new_size[1], new_size[0], 3],
                                    name='input_data')
        yolo_model = yolov3(num_class, anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(
            pred_feature_maps)

        pred_scores = pred_confs * pred_probs

        boxes, scores, labels = gpu_nms(pred_boxes,
                                        pred_scores,
                                        num_class,
                                        max_boxes=200,
                                        score_thresh=0.3,
                                        nms_thresh=0.45)

        saver = tf.train.Saver()
        saver.restore(sess, restore_path)

        boxes_, scores_, labels_ = sess.run([boxes, scores, labels],
                                            feed_dict={input_data: img})

        # rescale the coordinates to the original image
        if letterbox:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori / float(new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori / float(new_size[1]))

    tf.reset_default_graph()

    #transform detections into 1 line (#1class,#1conf,#1xmin,#1ymin,#1max,#1ymax,#2class,#2conf,...)
    boxes = []
    for i in range(np.shape(boxes_)[0]):
        boxes.append(labels_[i])
        boxes.append(scores_[i])
        boxes.extend(boxes_[i, :])

    return boxes
コード例 #6
0
def yolodet(anchor_path, image_path, new_size, letterbox, class_name_path, restore_path):

    
    
    anchors = parse_anchors(anchor_path)
    classes = read_class_names(class_name_path)
    num_class = len(classes)
    color_table = get_color_table(num_class)
    
    img_ori = cv2.imread(image_path)
    
    if letterbox:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.
    
    with tf.Session() as sess:
        input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data')
        yolo_model = yolov3(num_class, anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
    
        pred_scores = pred_confs * pred_probs
    
        boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)
    
        saver = tf.train.Saver()
        saver.restore(sess, restore_path)
    
        boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
    
        # rescale the coordinates to the original image
        if letterbox:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori/float(new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori/float(new_size[1]))
    
#        print("box coords:")
#        print(boxes_)
#        print('*' * 30)
#        print("scores:")
#        print(scores_)
#        print('*' * 30)
#        print("labels:")
#        print(labels_)
#    
#        for i in range(len(boxes_)):
#            x0, y0, x1, y1 = boxes_[i]
#            plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
#        cv2.imshow('Detection result', img_ori)
#        cv2.imwrite('detection_result.jpg', img_ori)
#        cv2.waitKey(0)
    tf.reset_default_graph()
    return boxes_, scores_, labels_
コード例 #7
0
feature_map_1 = sess.graph.get_tensor_by_name(
    'yolov4tiny/head/feature_map_1:0')
feature_map_2 = sess.graph.get_tensor_by_name(
    'yolov4tiny/head/feature_map_2:0')
pred_boxes, pred_confs, pred_probs = yolo_model.predict(
    [feature_map_1, feature_map_2])
pred_scores = pred_confs * pred_probs
boxes, scores, labels = gpu_nms(pred_boxes,
                                pred_scores,
                                class_num,
                                max_boxes=200,
                                score_thresh=0.3,
                                nms_thresh=0.45)

img_ori = cv2.imread("./data/demo_data/messi.jpg")
img, resize_ratio, dw, dh = letterbox_resize(img_ori, img_size[0], img_size[1])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.

boxes_, scores_, labels_ = sess.run([boxes, scores, labels],
                                    feed_dict={input: img})
boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio

for i in range(len(boxes_)):
    x0, y0, x1, y1 = boxes_[i]
    plot_one_box(img_ori, [x0, y0, x1, y1],
                 label=classes[labels_[i]] +
                 ', {:.2f}%'.format(scores_[i] * 100),
                 color=color_table[labels_[i]])
コード例 #8
0
def test_display_one_img(img_path):
    print(img_path)
    img_ori = cv2.imread(img_path)
    print(img_ori.shape)
    if args.letterbox_resize:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0],
                                                     args.new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(args.new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.
    with sess_yolo.as_default():
        with graph_yolo.as_default():
            boxes_, scores_, labels_ = sess_yolo.run(
                [boxes, scores, labels], feed_dict={input_data: img})

    # rescale the coordinates to the original image
    if args.letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))

    for j in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[j]
        x0 = np.maximum(x0, 0)
        y0 = np.maximum(y0, 0)
        x1 = np.maximum(x1, 0)
        y1 = np.maximum(y1, 0)

        label_index = labels_[j]
        # Crop the detected traffic signs

        if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0:
            # img_ori_ = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB).astype(np.float32)
            img_cropped = img_ori[int(y0):int(y1), int(x0):int(x1)]

            if img_cropped.shape[0] < 10 or img_cropped.shape[1] < 10:
                continue

            # cv2.imwrite('D:/Data/TrafficSigns/test/test_{}.png'.format(j), img_cropped)
            img_cropped = cv2.resize(img_cropped,
                                     (params.image_size, params.image_size))
            img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2RGB)
            img_cropped = img_cropped / 255.0
            # print(img_cropped)
            # np.savetxt('D:/Data/test_result/img.txt', img_cropped, fmt='%f', delimiter=',')

            if img_cropped.any():
                # tf.reset_default_graph()
                # new_graph = tf.Graph()
                with graph_triplet.as_default():
                    with sess_triplet.as_default():
                        image_input = test_input_fn(img_cropped, params)
                        image_input = sess_triplet.run(image_input)
                        label_index = sess_triplet.run(
                            predict_labels, feed_dict={inputs: image_input})
                        label_index = label_index[0] + 3
                        print(label_index)
                        # with open('D:/Data/test_result/outputs.txt', 'w') as ff:
                        #     ff.writelines(ff)
                # np.savetxt('D:/Data/test_result/outputs.txt', out, fmt='%f', delimiter=',')

        plot_one_box(img_ori, [x0, y0, x1, y1],
                     label_index=label_index,
                     label=args.classes_all[label_index] +
                     ', {:.2f}%'.format(scores_[j] * 100),
                     color=color_table[labels_[j]])

    cv2.namedWindow('Detection result', 0)
    cv2.resizeWindow('Detection result', 2400, 1800)
    cv2.imshow('Detection result', img_ori)
    cv2.imwrite('detection_result.jpg', img_ori)
    cv2.waitKey(0)
コード例 #9
0
def test_one_img(img_path):
    img_ori = cv2.imread(img_path)

    img_name = img_path.strip().split('\\')[-1]
    img_name = img_name.split('.')[0]

    if args.letterbox_resize:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0],
                                                     args.new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(args.new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.
    with sess_yolo.as_default():
        with graph_yolo.as_default():
            boxes_, scores_, labels_ = sess_yolo.run(
                [boxes, scores, labels], feed_dict={input_data: img})

    # rescale the coordinates to the original image
    if args.letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))

    for j in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[j]
        x0 = np.maximum(x0, 0)
        y0 = np.maximum(y0, 0)
        x1 = np.maximum(x1, 0)
        y1 = np.maximum(y1, 0)

        label_index = labels_[j]
        # Crop the detected traffic signs
        # the bbox of traffic signs must be big enough
        if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0:
            # img_ori_ = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB).astype(np.float32)
            img_cropped = img_ori[int(y0):int(y1), int(x0):int(x1)]

            if img_cropped.shape[0] < 10 or img_cropped.shape[1] < 10:
                continue

            img_cropped = cv2.resize(img_cropped,
                                     (params.image_size, params.image_size))
            img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2RGB)
            img_cropped = img_cropped / 255.0
            # cv2.imwrite('D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\{}_{}.jpg'.format(img_name, j), img_cropped*255.0)
            # img_cropped_path = 'D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\{}_{}.jpg'.format(img_name, j)
            # print(img_cropped_path)
            # cv2.imwrite('D:\\Data\\TrafficSigns\\test_images\\traffic_sign_cropped\\1_0.jpg', img_cropped*255.0)

            if img_cropped.any():
                # tf.reset_default_graph()
                # new_graph = tf.Graph()
                with graph_triplet.as_default():
                    with sess_triplet.as_default():
                        image_input = test_input_fn(img_cropped, params)
                        image_input = sess_triplet.run(image_input)
                        label_index = sess_triplet.run(
                            predict_labels, feed_dict={inputs: image_input})
                        label_index = label_index[0] + 3

            # with open('D:/Data/test_result/detect_result_self_collect.txt', 'a+') as f:
            #     f.write(img_path + ' ' + str(x0) + ' ' + str(y0) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(
            #         label_index[0]+2) + '\n')
        if isinstance(label_index, np.ndarray):
            label_index = label_index[0]
        with open('D:\Data\TrafficSigns\\test_images/detect_result.txt',
                  'a+') as f:
            f.write(img_path + ' ' + str(x0) + ' ' + str(y0) + ' ' + str(x1) +
                    ' ' + str(y1) + ' ' + str(label_index) + '\n')
コード例 #10
0
def estimatePose():
    parser = argparse.ArgumentParser(
        description="YOLO-V3 video test procedure.")
    # parser.add_argument("input_video", type=str,
    #                     help="The path of the input video.")
    parser.add_argument("--anchor_path",
                        type=str,
                        default="./data/yolo_anchors.txt",
                        help="The path of the anchor txt file.")
    parser.add_argument(
        "--new_size",
        nargs='*',
        type=int,
        default=[416, 416],
        help=
        "Resize the input image with `new_size`, size format: [width, height]")
    parser.add_argument("--letterbox_resize",
                        type=lambda x: (str(x).lower() == 'true'),
                        default=True,
                        help="Whether to use the letterbox resize.")
    parser.add_argument("--class_name_path",
                        type=str,
                        default="./data/my_data/YOLOPose.names",
                        help="The path of the class names.")
    parser.add_argument("--restore_path",
                        type=str,
                        default="./data/pose_weights/lunge_best",
                        help="The path of the weights to restore.")
    parser.add_argument("--save_video",
                        type=lambda x: (str(x).lower() == 'true'),
                        default=True,
                        help="Whether to save the video detection results.")
    args = parser.parse_args()

    args.anchors = parse_anchors(args.anchor_path)
    args.classes = read_class_names(args.class_name_path)
    args.num_class = len(args.classes)

    color_table = get_color_table(args.num_class)

    # vid = cv2.VideoCapture(args.input_video)
    vid = cv2.VideoCapture('./data/demo/lunge_03.mp4')
    # vid = cv2.VideoCapture(r'C:\Users\soma\SMART_Referee\SMART_Referee_DL\data\lunge\video\lunge_03.mp4')
    video_frame_cnt = int(vid.get(7))
    video_width = int(vid.get(3))
    video_height = int(vid.get(4))
    video_fps = int(vid.get(5))

    trainer_pose = pd.read_csv('./data/ground_truth/output_right.csv',
                               header=None)
    trainer_pose = trainer_pose.loc[:, [
        0, 1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
    ]]
    pca_df = trainer_pose.loc[:, [
        1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
    ]]
    pca_df.loc[:,
               [c for c in pca_df.columns if c % 2 ==
                1]] = pca_df.loc[:, [c for c in pca_df.columns
                                     if c % 2 == 1]] * video_width / 416
    pca_df.loc[:,
               [c for c in pca_df.columns if c % 2 ==
                0]] = pca_df.loc[:, [c for c in pca_df.columns
                                     if c % 2 == 0]] * video_height / 416
    pca_df = pca_df.astype(int)
    pca_df = pca_df.replace(0, np.nan)
    pca_df = pca_df.dropna()
    pca_df.describe()
    pca = PCA(n_components=1)
    pca.fit(pca_df)

    size = [video_width, video_height]
    list_p = []
    waist_err = 0
    critical_point = 0
    past_idx = 0
    startTrig = 0
    cntdown = 90
    t = 0
    TRLEN = len(trainer_pose)
    modify_ankle = pca_df.iloc[0, :].values
    base_rect = [(int(video_width / 4), int(video_height / 10)),
                 (int(video_width * 3 / 4), int(video_height * 19 / 20))]
    c_knee = c_waist = c_speed = 0

    if args.save_video:
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps,
                                      (video_width, video_height))

    with tf.Session() as sess:
        input_data = tf.placeholder(tf.float32,
                                    [1, args.new_size[1], args.new_size[0], 3],
                                    name='input_data')
        yolo_model = yolov3(args.num_class, args.anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(
            pred_feature_maps)

        pred_scores = pred_confs * pred_probs

        boxes, scores, labels = gpu_nms(pred_boxes,
                                        pred_scores,
                                        args.num_class,
                                        max_boxes=200,
                                        score_thresh=0.3,
                                        nms_thresh=0.45)

        saver = tf.train.Saver()
        saver.restore(sess, args.restore_path)

        for i in range(video_frame_cnt):
            ret, img_ori = vid.read()
            if args.letterbox_resize:
                img, resize_ratio, dw, dh = letterbox_resize(
                    img_ori, args.new_size[0], args.new_size[1])
            else:
                height_ori, width_ori = img_ori.shape[:2]
                img = cv2.resize(img_ori, tuple(args.new_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = np.asarray(img, np.float32)
            img = img[np.newaxis, :] / 255.

            start_time = time.time()
            boxes_, scores_, labels_ = sess.run([boxes, scores, labels],
                                                feed_dict={input_data: img})

            # rescale the coordinates to the original image
            if args.letterbox_resize:
                boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
                boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
            else:
                boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
                boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))

            people_pose = get_people_pose(boxes_, labels_,
                                          base_rect)  # list-dict
            people_pose = np.array([p[1] for p in people_pose[0]
                                    ]).flatten()  # dict-tuple -> list
            people_pose = people_pose[[
                0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
            ]]

            # Start Trigger
            if startTrig == 2:
                pass
            elif startTrig == 0:  # start
                # 기준 박스
                cv2.rectangle(img_ori, base_rect[0], base_rect[1], (0, 0, 255),
                              2)
                if isInBox(people_pose, base_rect[0], base_rect[1]):
                    # t_resize_pose = resize_pose(people_pose, trainer_pose.iloc[0, 1:].values)
                    t_resize_pose = resize_pose(people_pose,
                                                pca_df.iloc[0, :].values)
                    img_ori = draw_ground_truth(img_ori, t_resize_pose)
                    # img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values)
                    startTrig = isStart(people_pose,
                                        trainer_pose.iloc[0, 1:].values, size)

                    cv2.imshow('image', img_ori)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break
                    continue
                else:
                    print("박스안에 들어와주세요!!")
                    continue

            elif startTrig == 1:
                img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values)
                cv2.putText(img_ori, str(int(cntdown / 30)), (100, 300),
                            cv2.FONT_HERSHEY_SIMPLEX, 10, (255, 0, 0), 10)
                cv2.imshow('image', img_ori)
                cntdown -= 1
                if cntdown == 0:
                    startTrig = 2
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                continue
            '''check ankle : 편차 40이상 발생시 전에 값 으로 업데이트'''
            people_pose = check_ankle(list_p, people_pose, modify_ankle, size)

            # f = open('user.csv', 'a', encoding='utf-8', newline='')
            # wr = csv.writer(f)
            # wr.writerow(people_pose)
            # ground truth 그리기

            list_p.append(people_pose)

            img_ori = draw_ground_truth(img_ori, pca_df.iloc[t, :].values)

            if check_waist(people_pose):
                waist_err += 1

            if waist_err is 60:  # waist_err는 60번 틀리면 피드백함
                feedback_waist()
                c_waist += 1
                waist_err = 0

            if trainer_pose.iloc[t, 0] == 1:  # t는 특정 시점 + i frame
                critical_point += 1
                if critical_point % 2 == 0:
                    my_pose = makeMypose_df(list_p)
                    c_speed = check_speed(
                        my_pose, trainer_pose.iloc[past_idx:t + 1, 1:], pca,
                        c_speed)
                    c_knee = check_knee(people_pose, c_knee)
                    modify_ankle = list_p[-1]
                    list_p = []
                    past_idx = t
            t += 1
            if t == TRLEN:
                break

            # img_ori = draw_body(img_ori, boxes_, labels_)
            # for i in range(len(boxes_)):
            #     x0, y0, x1, y1 = boxes_[i]
            #     plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])

            # 사용자 자세 그리기
            # img_ori = draw_truth(img_ori, people_pose)

            end_time = time.time()
            cv2.putText(img_ori,
                        '{:.2f}ms'.format((end_time - start_time) * 1000),
                        (40, 40),
                        0,
                        fontScale=1,
                        color=(0, 255, 0),
                        thickness=2)

            cv2.imshow('image', img_ori)
            if args.save_video:
                videoWriter.write(img_ori)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break

        vid.release()
        cv2.destroyAllWindows()
        if args.save_video:
            videoWriter.release()

    f = open('./data/score/result.csv', 'a', encoding='utf-8', newline='')
    wr = csv.writer(f)
    d = datetime.today().strftime("%Y/%m/%d")
    t = datetime.today().strftime("%H:%M:%S")
    wr.writerow([d, t, c_knee, c_waist, c_speed])
コード例 #11
0
def test_display_one_img(img_path):
    img_ori = cv2.imread(img_path)
    if args.letterbox_resize:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0],
                                                     args.new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(args.new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.
    with sess_yolo.as_default():
        with graph_yolo.as_default():
            boxes_, scores_, labels_ = sess_yolo.run(
                [boxes, scores, labels], feed_dict={input_data: img})

    # rescale the coordinates to the original image
    if args.letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))

    for j in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[j]
        x0 = np.maximum(x0, 0)
        y0 = np.maximum(y0, 0)
        x1 = np.maximum(x1, 0)
        y1 = np.maximum(y1, 0)

        label_index = labels_[j]
        # Crop the detected traffic signs

        if x1 - x0 > 10 and y1 - y0 > 10 and labels_[j] == 0:
            img_ori_ = cv2.cvtColor(img_ori,
                                    cv2.COLOR_BGR2RGB).astype(np.float32)
            img_cropped = img_ori_[int(y0):int(y1), int(x0):int(x1)]

            if img_cropped.any():
                tf.reset_default_graph()
                new_graph = tf.Graph()
                with new_graph.as_default():
                    with tf.Session(graph=new_graph) as new_sess:
                        siamese_model = SiameseNet()
                        siamese_model.load_weights(
                            '/home/tracy/PycharmProjects/SiameseNet/checkpoint/RGBscaled/best/my_model'
                        )
                        img1, img2 = dataloader(img_cropped)
                        label_pred, label_score, _ = siamese_model.prediction(
                            img1, img2)
                        label_pred_, label_score_ = new_sess.run(
                            [label_pred, label_score])

                # with sess_siam.as_default():
                #     with sess_siam.graph.as_default():
                #         img1, img2 = dataloader(img_cropped)
                #         label_pred, label_score, _ = siamese_model.prediction(img1, img2)
                #         label_pred_, label_score_ = sess_siam.run([label_pred, label_score])

                # cv2.imwrite('/home/tracy/YOLOv3_TensorFlow/temp/' + str(i) + '_' + str(j) + '.jpg', img_cropped)

    #     print("Writting %s"%img)
    #     test_one_img('/home/tracy/data/TrafficSign_test/Images1/' + img)
    #     print('Done writing %s'%img)
    # Choose the one label with highest score
                pred_labels = np.nonzero(label_pred_)
                pred_scores = label_score_[pred_labels]
                # print("pred_scores: ", pred_scores)
                if len(pred_scores) > 0:
                    label_index = np.argmax(pred_scores)
                    label_index = pred_labels[0][label_index] + 2
                # labels_[j] = label_index

        plot_one_box(img_ori, [x0, y0, x1, y1],
                     label_index=label_index,
                     label=args.classes_all[label_index] +
                     ', {:.2f}%'.format(scores_[j] * 100),
                     color=color_table[labels_[j]])

    cv2.namedWindow('Detection result', 0)
    cv2.resizeWindow('Detection result', 2400, 1800)
    cv2.imshow('Detection result', img_ori)
    cv2.imwrite('detection_result.jpg', img_ori)
    cv2.waitKey(0)
コード例 #12
0
    async def detection(self, img_ori, mode, detection_marker):
        if self.letterbox_resizes:
            img, resize_ratio, dw, dh = letterbox_resize(
                img_ori, self.new_size[0], self.new_size[1])
        else:
            height_ori, width_ori = img_ori.shape[:2]
            img = cv2.resize(img_ori, tuple(self.new_size))

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, np.float32)
        img = img[np.newaxis, :] / 255.
        start = time.time()
        boxes_, scores_, labels_ = self.sess.run(
            [self.boxes, self.scores, self.labels],
            feed_dict={self.input_data: img})
        end = time.time()
        print(end - start)
        # rescale the coordinates to the original image
        if self.letterbox_resizes:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori / float(self.new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori / float(self.new_size[1]))

        # sort -- tracker for each person
        dets = []
        if len(boxes_) > 0:

            for i in range(len(boxes_)):
                x, y, w, h = boxes_[i]

                dets.append([x, y, x + w, y + h, scores_[i]])

        # np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
        dets = np.asarray(dets)
        tracks = self.tracker.update(dets)

        new_boxes = []
        indexIDs = []

        previous = self.memory.copy()
        self.memory = {}

        for track in tracks:
            new_boxes.append([track[0], track[1], track[2], track[3]])
            indexIDs.append(int(track[4]))
            self.memory[indexIDs[-1]] = new_boxes[-1]

        if len(new_boxes) > 0:
            i = 0

            for box in new_boxes:
                x = int(box[0])
                y = int(box[1])
                w = int(box[2])
                h = int(box[3])

                color = [
                    int(c) for c in self.COLORS[indexIDs[i] % len(self.COLORS)]
                ]

                if indexIDs[i] in previous:
                    previous_box = previous[indexIDs[i]]
                    (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                    (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                    p0 = (int(x + 10), int(y + 100))
                    p1 = (int(x2 + 10), int(y2 + 100))
                    cv2.line(img_ori, p0, p1, color, 3)  # tracker line

                    if intersect(p0, p1,
                                 (detection_marker.X1, detection_marker.Y1),
                                 (detection_marker.X2, detection_marker.Y2)):
                        self.counter += 1
                        if mode == 'PH':
                            if self.classes[
                                    labels_[i]] == 'PHV' or self.classes[
                                        labels_[i]] == 'PH':
                                pass
                            else:
                                self.violation += 1
                        elif mode == 'PV':
                            if self.classes[
                                    labels_[i]] == 'PHV' or self.classes[
                                        labels_[i]] == 'PV':
                                pass
                            else:
                                self.violation += 1
                        elif self.classes[labels_[i]] != mode:
                            self.violation += 1

                i += 1

        cv2.line(img_ori, (detection_marker.X1, detection_marker.Y1),
                 (detection_marker.X2, detection_marker.Y2), (0, 255, 255), 3)

        for i in range(len(boxes_)):
            x0, y0, x1, y1 = boxes_[i]
            plot_one_box(img_ori, [x0, y0, x1, y1],
                         label=self.classes[labels_[i]] +
                         ', {:.2f}%'.format(scores_[i] * 100),
                         color=self.color_table[labels_[i]])

            if mode == 'PH':
                if self.classes[labels_[i]] == 'PHV' or self.classes[
                        labels_[i]] == 'PH':
                    pass
                else:
                    cv2.putText(img_ori, 'Please wear: a helmet', (550, 40), 0,
                                1, (0, 0, 255), 2)
            elif mode == 'PV':
                if self.classes[labels_[i]] == 'PHV' or self.classes[
                        labels_[i]] == 'PV':
                    pass
                else:
                    cv2.putText(img_ori, 'Please wear: a safety vest',
                                (550, 40), 0, 1, (0, 0, 255), 2)
            elif mode == 'PLC' and self.classes[labels_[i]] != 'PLC':
                cv2.putText(img_ori, 'Please wear: a lab coat', (550, 40), 0,
                            1, (0, 0, 255), 2)
            elif mode == 'PHV' and self.classes[labels_[i]] != 'PHV':
                cv2.putText(img_ori, 'Please wear: a helmet and a safety vest',
                            (550, 40), 0, 1, (0, 0, 255), 2)
            elif self.classes[labels_[i]] != mode:
                cv2.putText(img_ori, 'Please wear: ' + str(mode), (550, 40), 0,
                            1, (0, 0, 255), 2)

        # print({'TotalViolation': self.violation,'TotalPeople':self.counter})

        # cv2.putText(img_ori, mode+'  Mode', (300, 40), 0,
        #             fontScale=1, color=(0, 255, 0), thickness=2)
        # cv2.putText(img_ori, 'People Count: '+ str(self.counter), (40, 620), 0,
        #             1, (255,255,255), 2)
        # cv2.putText(img_ori, 'Violation Count: '+ str(self.violation), (40, 660), 0,
        #             1, (255,255,255), 2)

        return {
            'TotalViolation': self.violation,
            'TotalPeople': self.counter
        }, img_ori
コード例 #13
0
ファイル: test.py プロジェクト: wfx1024/tensorflow_es
def video_detect(input_args):
    vid = cv2.VideoCapture(input_args.input_video)
    video_frame_cnt = int(vid.get(7))
    video_width = int(vid.get(3))
    video_height = int(vid.get(4))
    video_fps = int(vid.get(5))

    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    video_writer = cv2.VideoWriter(pred_args.output_video, fourcc, video_fps, (video_width, video_height))

    with tf.Session() as sess:
        input_data = tf.placeholder(tf.float32, [1, pred_args.new_size[1], pred_args.new_size[0], 3], name='input_data')
        yolo_model = yolov3(pred_args.num_class, pred_args.anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)

        pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
        pred_scores = pred_confs * pred_probs
        boxes, scores, labels = gpu_nms(
            pred_boxes, pred_scores, pred_args.num_class,
            max_boxes=200, score_thresh=0.3, nms_thresh=0.45
        )
        saver = tf.train.Saver()
        saver.restore(sess, pred_args.weight_path)

        for i in range(video_frame_cnt):
            ret, img_ori = vid.read()
            if input_args.use_letterbox_resize:
                img, resize_ratio, dw, dh = letterbox_resize(img_ori, pred_args.new_size[0], pred_args.new_size[1])
            else:
                height_ori, width_ori = img_ori.shape[:2]
                img = cv2.resize(img_ori, tuple(pred_args.new_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = np.asarray(img, np.float32)
            img = img[np.newaxis, :] / 255.

            start_time = time.time()
            boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
            end_time = time.time()

            if input_args.use_letterbox_resize:
                boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
                boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
            else:
                boxes_[:, [0, 2]] *= (width_ori / float(pred_args.new_size[0]))
                boxes_[:, [1, 3]] *= (height_ori / float(pred_args.new_size[1]))

            for i in range(len(boxes_)):
                x0, y0, x1, y1 = boxes_[i]
                plot_one_box(img_ori, [x0, y0, x1, y1],
                             label=pred_args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100),
                             color=pred_args.color_table[labels_[i]])
            cv2.putText(
                img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000),
                (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2
            )
            cv2.imshow('Detection result', img_ori)
            video_writer.write(img_ori)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        vid.release()
        video_writer.release()
コード例 #14
0
ファイル: test.py プロジェクト: wfx1024/tensorflow_es
def img_detect(input_args):
    """
    图片检测
    :param input_args:
    :return:
    """
    img_ori = cv2.imread(input_args.input_image)  # opencv 打开
    if input_args.use_letterbox_resize:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, pred_args.new_size[0], pred_args.new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(pred_args.new_size))

    # img 转RGB, 转float, 归一化
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.

    sess = tf.Session()

    input_data = tf.placeholder(
        tf.float32, [1, pred_args.new_size[1], pred_args.new_size[0], 3], name='input_data'
    )
    with tf.variable_scope('yolov3'):
        yolo_model = yolov3(pred_args.num_class, pred_args.anchors)
        pred_feature_maps = yolo_model.forward(input_data, False)

    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)
    pred_scores = pred_confs * pred_probs
    boxes, scores, labels = gpu_nms(
        pred_boxes, pred_scores, pred_args.num_class,
        max_boxes=200, score_thresh=0.3, nms_thresh=0.45)

    saver = tf.train.Saver()
    saver.restore(sess, pred_args.weight_path)

    boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})

    # 还原坐标到原图
    if input_args.use_letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori / float(pred_args.new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori / float(pred_args.new_size[1]))

    print('box coords:', boxes_, '\n' + '*' * 30)
    print('scores:', scores_, '\n' + '*' * 30)
    print('labels:', labels_)

    for i in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[i]
        plot_one_box(
            img_ori, [x0, y0, x1, y1],
            label=pred_args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100),
            color=pred_args.color_table[labels_[i]]
        )
    cv2.imshow('Detection result', img_ori)
    cv2.imwrite(pred_args.output_image, img_ori)
    cv2.waitKey(0)
    sess.close()
コード例 #15
0
def single_image_test(imgname):
    parser = argparse.ArgumentParser(
        description="YOLO-V3 test single image test procedure.")
    parser.add_argument("--input_image",
                        type=str,
                        default="./static/uploads/beforeimg/" + imgname,
                        help="The path of the input image.")
    parser.add_argument("--anchor_path",
                        type=str,
                        default="./data/yolo_anchors.txt",
                        help="The path of the anchor txt file.")
    parser.add_argument(
        "--new_size",
        nargs='*',
        type=int,
        default=[416, 416],
        help=
        "Resize the input image with `new_size`, size format: [width, height]")
    parser.add_argument("--letterbox_resize",
                        type=lambda x: (str(x).lower() == 'true'),
                        default=True,
                        help="Whether to use the letterbox resize.")
    parser.add_argument("--class_name_path",
                        type=str,
                        default="./data/coco.names",
                        help="The path of the class names.")
    parser.add_argument("--restore_path",
                        type=str,
                        default="./data/darknet_weights/yolov3.ckpt",
                        help="The path of the weights to restore.")
    args = parser.parse_args()

    args.anchors = parse_anchors(args.anchor_path)
    args.classes = read_class_names(args.class_name_path)
    args.num_class = len(args.classes)

    color_table = get_color_table(args.num_class)

    img_ori = cv2.imread(args.input_image)
    if args.letterbox_resize:
        img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0],
                                                     args.new_size[1])
    else:
        height_ori, width_ori = img_ori.shape[:2]
        img = cv2.resize(img_ori, tuple(args.new_size))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.asarray(img, np.float32)
    img = img[np.newaxis, :] / 255.

    with tf.Session() as sess:
        input_data = tf.placeholder(tf.float32,
                                    [1, args.new_size[1], args.new_size[0], 3],
                                    name='input_data')
        yolo_model = yolov3(args.num_class, args.anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(
            pred_feature_maps)

        pred_scores = pred_confs * pred_probs

        boxes, scores, labels = gpu_nms(pred_boxes,
                                        pred_scores,
                                        args.num_class,
                                        max_boxes=200,
                                        score_thresh=0.3,
                                        nms_thresh=0.45)

        saver = tf.train.Saver()
        saver.restore(sess, args.restore_path)

        boxes_, scores_, labels_ = sess.run([boxes, scores, labels],
                                            feed_dict={input_data: img})

        # rescale the coordinates to the original image
        if args.letterbox_resize:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1]))

        print("box coords:")
        print(boxes_)
        print('*' * 30)
        print("scores:")
        print(scores_)
        print('*' * 30)
        print("labels:")
        print(labels_)

        for i in range(len(boxes_)):
            x0, y0, x1, y1 = boxes_[i]
            plot_one_box(img_ori, [x0, y0, x1, y1],
                         label=args.classes[labels_[i]] +
                         ', {:.2f}%'.format(scores_[i] * 100),
                         color=color_table[labels_[i]])
        #cv2.imshow('Detection result', img_ori)
        cv2.imwrite('static/uploads/afterimg/' + imgname, img_ori)
        #cv2.waitKey(0)

        doc = []
        doc.append("发现:")
        item = ["安全帽", "未带安全帽的人"]
        if (len(labels_) == 0):
            doc.append("什么都没有发现。")
        else:
            for i in range(len(labels_)):
                doc.append(item[labels_[i]] + ",范围:" + str(boxes_[i]) +
                           ",可能性为:" + str(scores_[i]))
        return doc
コード例 #16
0
def detect_in_video(video_path):
    # VideoWriter is the responsible of creating a copy of the video
    # used for the detections but with the detections overlays. Keep in
    # mind the frame size has to be the same as original video.
    # out = cv2.VideoWriter('../temp/' + 'WIN_20191218_11_03_57_Pro.mp4', cv2.VideoWriter_fourcc(
    #    'M', 'J', 'P', 'G'), 10, (1280, 720))

    if is_yolo:
        print('yolo!')
        configuration = tf.ConfigProto(device_count={"GPU": 0})
        sess = tf.Session(config=configuration)
        input_data = tf.placeholder(tf.float32,
                                    [1, new_size[1], new_size[0], 3],
                                    name='input_data')
        yolo_model = yolov3(num_class, anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(
            pred_feature_maps)

        pred_scores = pred_confs * pred_probs

        boxes, scores, labels = gpu_nms(pred_boxes,
                                        pred_scores,
                                        num_class,
                                        max_boxes=1,
                                        score_thresh=0.2,
                                        nms_thresh=0.45)

        saver = tf.train.Saver()
        saver.restore(sess, restore_path)
    else:
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
            configuration = tf.ConfigProto(device_count={"GPU": 0})
            sess = tf.Session(config=configuration, graph=detection_graph)

            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object
            # was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class
            # label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)

    frame_statistics = []
    frame_id = 1
    is_skip_frame = True
    frame_skip_count = 0

    # Создать директорию с кадрами для заданного видео
    video_base_name = os.path.basename(video_path)
    video_name = os.path.splitext(video_base_name)[0]
    video_dir = join(os.path.dirname(video_path), video_name)
    images_dir = "images"
    video_images_dir = join(video_dir, images_dir)

    if not os.path.exists(video_images_dir):
        os.makedirs(video_images_dir)
    else:
        # Удалить все кадры из целевой директории
        remove_files_in_dir(video_images_dir)

    video_images_dir_rat = join(video_images_dir, 'rat')
    video_images_dir_mouse = join(video_images_dir, 'mouse')
    os.makedirs(video_images_dir_rat, exist_ok=True)
    os.makedirs(video_images_dir_mouse, exist_ok=True)
    remove_files_in_dir(video_images_dir_rat)
    remove_files_in_dir(video_images_dir_mouse)

    # Загрузка видео
    cap = cv2.VideoCapture(video_path)
    video_frame_cnt = int(cap.get(7))
    video_width = int(cap.get(3))
    video_height = int(cap.get(4))
    video_fps = int(cap.get(5))

    # Узнать разрешение видео
    video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

    # Указать разрешение картинок

    cur_dir = os.getcwd()
    os.chdir(video_images_dir)
    while cap.isOpened():
        # Read the frame
        ret, frame = cap.read()
        if frame is not None:
            # Recolor the frame. By default, OpenCV uses BGR color space.
            # This short blog post explains this better:
            # https://www.learnopencv.com/why-does-opencv-use-bgr-color-format/
            # color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if not is_skip_frame:
                if is_yolo:
                    print('yoloo!!')
                    if is_letterbox_resize:
                        img, resize_ratio, dw, dh = letterbox_resize(
                            frame, new_size[0], new_size[1])
                    else:
                        height_ori, width_ori = frame.shape[:2]
                        img = cv2.resize(frame, tuple(new_size))
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = np.asarray(img, np.float32)
                    img = img[np.newaxis, :] / 255.

                    start_time = time.time()
                    boxes_, scores_, labels_ = sess.run(
                        [boxes, scores, labels], feed_dict={input_data: img})
                    end_time = time.time()

                    # rescale the coordinates to the original image
                    if is_letterbox_resize:
                        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] -
                                             dw) / resize_ratio
                        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] -
                                             dh) / resize_ratio
                    else:
                        boxes_[:, [0, 2]] *= (width_ori / float(new_size[0]))
                        boxes_[:, [1, 3]] *= (height_ori / float(new_size[1]))

                    for i in range(len(boxes_)):
                        if scores_[i] == max(scores_):
                            x0, y0, x1, y1 = boxes_[i]
                            plot_one_box(frame, [x0, y0, x1, y1],
                                         label=classes_yolo[labels_[i]] +
                                         ', {:.2f}%'.format(scores_[i] * 100),
                                         color=color_table[labels_[i]])

                            rodent_confidence = scores_[i]
                            rodent_class_id = labels_[i] + 1
                            rodent_class_name = classes_yolo[labels_[i]]
                            if rodent_confidence >= .20:
                                frame_statistics.append({
                                    'frame_id':
                                    frame_id,
                                    'confidence':
                                    rodent_confidence,
                                    'rodent_class_id':
                                    rodent_class_id,
                                    'rodent_class_name':
                                    rodent_class_name,
                                })

                                # Сохранить кадр
                                frame_name = rodent_class_name + '/image' + str(
                                    frame_id) + '.jpg'
                                cv2.imwrite(frame_name, frame)

                                # Сохранить xml-файл
                                #scores = np.squeeze(scores[0])

                                #bbox_coords = boxes[0]
                                #writer = Writer('.', video_width, video_height)
                                #writer.addObject(rodent_class_name, bbox_coords[1] * video_width,
                                #bbox_coords[0] * video_height, bbox_coords[3] * video_width,
                                #bbox_coords[2] * video_height)
                                #writer.save('image' + str(frame_id) + '.xml')

                            #else:
                            # Сохранить кадр
                            #frame_name = 'image' + str(frame_id) + '.jpg'
                            #cv2.imwrite(frame_name, frame)

                    cv2.putText(frame,
                                '{:.2f}ms'.format(
                                    (end_time - start_time) * 1000), (40, 40),
                                0,
                                fontScale=1,
                                color=(0, 255, 0),
                                thickness=2)

                else:
                    image_np_expanded = np.expand_dims(frame, axis=0)

                    # Actual detection.
                    (boxes, scores, classes, num) = sess.run(
                        [
                            detection_boxes, detection_scores,
                            detection_classes, num_detections
                        ],
                        feed_dict={image_tensor: image_np_expanded})

                    # Visualization of the results of a detection.
                    # note: perform the detections using a higher threshold
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        frame,
                        np.squeeze(boxes[0]),
                        np.squeeze(classes[0]).astype(np.int32),
                        np.squeeze(scores[0]),
                        category_index,
                        use_normalized_coordinates=True,
                        line_thickness=8,
                        max_boxes_to_draw=1,
                        min_score_thresh=.20)

                # rodent_confidence = np.squeeze(scores[0])[0]
                # rodent_class_id = np.squeeze(classes[0]).astype(np.int32)[0]
                # rodent_class_name = category_index[rodent_class_id]['name']
                # if rodent_confidence > .20:
                #     frame_statistics.append({'frame_id': frame_id,
                #                              'confidence': rodent_confidence,
                #                              'rodent_class_id': rodent_class_id,
                #                              'rodent_class_name': rodent_class_name,
                #                              })
                #
                #     # Сохранить кадр
                #     frame_name = rodent_class_name + '/image' + str(frame_id) + '.jpg'
                #     cv2.imwrite(frame_name, frame)
                #
                #     # Сохранить xml-файл
                #     scores = np.squeeze(scores[0])
                #     for i in range(min(1, np.squeeze(boxes[0]).shape[0])):
                #         if scores is None or scores[i] > .20:
                #             boxes = tuple(boxes[i].tolist())
                #
                #     bbox_coords = boxes[0]
                #     writer = Writer('.', video_width, video_height)
                #     writer.addObject(rodent_class_name, bbox_coords[1] * video_width,
                #                      bbox_coords[0] * video_height, bbox_coords[3] * video_width,
                #                      bbox_coords[2] * video_height)
                #     writer.save('image' + str(frame_id) + '.xml')
                # else:
                #     # Сохранить кадр
                #     frame_name = 'image' + str(frame_id) + '.jpg'
                #     cv2.imwrite(frame_name, frame)

            cv2.imshow('frame', cv2.resize(frame, (800, 600)))
            output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # out.write(output_rgb

            # Пропустить кадр, если необходимо
            if is_skip_frame:
                while 1:
                    key = cv2.waitKey(1)
                    if key == 32:  # Нажата клавиша "space"
                        frame_skip_count += 1
                        print("Вы пропустили " + str(frame_skip_count) +
                              " кадр")
                        break
                    elif key == 113 or key == 233:  # Нажата клавиша 'q' ('й')
                        is_skip_frame = False
                        break

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            frame_id += 1

    # out.release()
    os.chdir(cur_dir)
    cap.release()
    cv2.destroyAllWindows()

    statistics = {
        'frame_count': frame_id,  # Количество кадров
        'frame_skip_count': frame_skip_count,  # Количество пропущенных кадров
        'frame_rodent_count': 0,  # Количество кадров с грызуном
        'frame_rat_count': 0,  # Количество кадров с крысой
        'frame_mouse_count': 0,  # Количество кадров с мышью
        'sum_confidence_rat': 0,  # Сумма вероятностей крысы на видео
        'sum_confidence_mouse': 0,  # Сумма вероятностей мыши на видео
        'mean_confidence_rat': 0,  # Средняя вероятность крысы на видео
        'mean_confidence_mouse': 0  # Средняя вероятность мыши на видео
    }

    for frame_statistic in frame_statistics:
        if frame_statistic['rodent_class_name'] == 'rat':
            statistics['frame_rodent_count'] += 1
            statistics['frame_rat_count'] += 1
            statistics['sum_confidence_rat'] += frame_statistic['confidence']
            statistics['mean_confidence_rat'] = statistics[
                'sum_confidence_rat'] / statistics['frame_rat_count']
        elif frame_statistic['rodent_class_name'] == 'mouse':
            statistics['frame_rodent_count'] += 1
            statistics['frame_mouse_count'] += 1
            statistics['sum_confidence_mouse'] += frame_statistic['confidence']
            statistics['mean_confidence_mouse'] = statistics[
                'sum_confidence_mouse'] / statistics['frame_mouse_count']

    print('----->>> Результаты обнаружения <<<-----')
    print('Количество кадров: ' + str(statistics['frame_count']))
    print('Количество пропущенных кадров: ' +
          str(statistics['frame_skip_count']))
    print('Количество кадров с грызуном: ' +
          str(statistics['frame_rodent_count']))
    print('Количество кадров с крысой: ' + str(statistics['frame_rat_count']))
    print('Количество кадров с мышью: ' + str(statistics['frame_mouse_count']))
    print('Средняя вероятность крысы на видео: ' +
          str(statistics['mean_confidence_rat']))
    print('Средняя вероятность мыши на видео: ' +
          str(statistics['mean_confidence_mouse']))