Ejemplo n.º 1
0
def predict_image_from_bytes(bytes):
    #load byte data into a stream
    img_file = io.BytesIO(bytes)
    #encoding the image in base64 to serve in HTML
    img_pil = Image.open(img_file)
    img_pil.save("img.jpg", format="JPEG")
    img_uri = base64.b64encode(open("img.jpg", 'rb').read()).decode('utf-8')
    #make inference on image and return an HTML respons
    img = cv2.imread("img.jpg", cv2.IMREAD_COLOR)
    pts = model.predict(img)
    import numpy as np
    from misc.visualization import draw_points_and_skeleton, joints_dict
    #img = cv2.imread('image.jpg',0) # reads image 'opencv-logo.png' as grayscale
    person_ids = np.arange(len(pts), dtype=np.int32)
    for i, (pt, pid) in enumerate(zip(pts, person_ids)):
        img = draw_points_and_skeleton(img,
                                       pt,
                                       joints_dict()['coco']['skeleton'],
                                       person_index=pid,
                                       points_color_palette='gist_rainbow',
                                       skeleton_color_palette='jet',
                                       points_palette_samples=10)
    saved_image = cv2.imwrite('pointed.jpg', img)
    saved_image_uri = base64.b64encode(open("pointed.jpg",
                                            'rb').read()).decode('utf-8')
    return HTMLResponse("""
                <html>
                <figure class = "figure">
                <img src="data:image/png;base64, %s" class = "figure-img">
                </figure>
                </html>
                """ % (saved_image_uri))
Ejemplo n.º 2
0
def get_keypoint(camera_id = 0, filename = None, hrnet_c = 48, hrnet_j = 17, hrnet_weights = "./weights/pose_hrnet_w48_384x288.pth", hrnet_joints_set = "coco", image_resolution = '(384, 288)', single_person = True,
         max_batch_size = 16, disable_vidgear = False, device = None):
    if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
    else:
            device = torch.device('cpu')
    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
	
    if filename is not None:
        #video = cv2.VideoCapture(filename)
        image = cv2.imread(filename, cv2.IMREAD_COLOR)
        #assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()
    model = SimpleHRNet(
        hrnet_c,
        hrnet_j,
        hrnet_weights,
        resolution=image_resolution,
        multiperson=not single_person,
        max_batch_size=max_batch_size,
        device=device
    )
    pts = model.predict(image)
    resolution = image.shape
    x_len = resolution[0]
    y_len = resolution[1]
    vector = []
    keypoints = pts[0]
    for pt in keypoints:
        pt = list(pt)
        temp = []
        temp.append((pt[0]/x_len))
        temp.append((pt[1]/y_len))
        vector.extend(temp)

    for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(image, pt, joints_dict()[hrnet_joints_set]['skeleton'], person_index=i,
                                             points_color_palette='gist_rainbow', skeleton_color_palette='jet',
                                             points_palette_samples=10)

    if has_display:
        output_name = filename.split("\\")
        output_name = output_name[-2] + "_" + output_name[-1]			
        cv2.imwrite('tested2\\'+output_name+'.png', frame)
        cv2.imwrite("keypoints_"+filename+".png", frame)
        cv2.imshow('frame.png', frame)
        k = cv2.waitKey(1)
    return vector
Ejemplo n.º 3
0
def main(filename, hrnet_m, hrnet_c, hrnet_j, hrnet_weights, hrnet_joints_set,
         image_resolution, max_batch_size, device):

    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

    # print(device)

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    # filename = os.path.join(os.getcwd(), filename)
    # print(filename)
    image = cv2.imread(filename)

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        model_name=hrnet_m,
                        resolution=image_resolution,
                        multiperson=False,
                        return_bounding_boxes=False,
                        max_batch_size=max_batch_size,
                        device=device)

    pts = model.predict(image)

    person_ids = np.arange(len(pts), dtype=np.int32)

    for i, (pt, pid) in enumerate(zip(pts, person_ids)):
        frame = draw_points_and_skeleton(
            image,
            pt,
            joints_dict()[hrnet_joints_set]['skeleton'],
            person_index=pid,
            points_color_palette='gist_rainbow',
            skeleton_color_palette='jet',
            points_palette_samples=10)
    if has_display:
        cv2.imshow('frame.png', frame)
        cv2.waitKey(0)
Ejemplo n.º 4
0
def main(camera_id, filename, hrnet_m, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, single_person, use_tiny_yolo,
         disable_tracking, max_batch_size, disable_vidgear, save_video,
         video_format, video_framerate, device, exercise_type):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

    # print(device)
    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()
    if use_tiny_yolo:
        yolo_model_def = "./models/detectors/yolo/config/yolov3-tiny.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3-tiny.weights"
    else:
        yolo_model_def = "./models/detectors/yolo/config/yolov3.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3.weights"

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        model_name=hrnet_m,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        return_bounding_boxes=not disable_tracking,
                        max_batch_size=max_batch_size,
                        yolo_model_def=yolo_model_def,
                        yolo_class_path=yolo_class_path,
                        yolo_weights_path=yolo_weights_path,
                        device=device)

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0

    flag = 0
    prev_flag = flag
    counter = 0
    angle = 0
    dist = 0
    prev_dist = dist

    while True:
        t = time.time()

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break

        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)

        if not disable_tracking:
            boxes, pts = pts

        if not disable_tracking:
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)

            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids

        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        start_point = (45, 5)
        end_point = (1300, 250)
        colorr = (0, 0, 0)
        thicknessr = -1
        frame = cv2.rectangle(frame, start_point, end_point, colorr,
                              thicknessr)

        if exercise_type == 1:  #for pushUps

            for i, (pt, pid) in enumerate(zip(pts, person_ids)):
                frame, angle = draw_points_and_skeleton(
                    frame,
                    pt,
                    joints_dict()[hrnet_joints_set]['skeleton'],
                    person_index=pid,
                    points_color_palette='gist_rainbow',
                    skeleton_color_palette='jet',
                    points_palette_samples=10,
                    exercise_type=1)

            fps = 1. / (time.time() - t)
            print('\rframerate: %f fps' % fps, end='')

            #angle=findangle(frame, pts, joints_dict()[hrnet_joints_set]['skeleton'])
            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 80)
            fontScale = 1
            color = (255, 255, 255)
            thickness = 2
            frame = cv2.putText(frame, str(angle), org, font, fontScale, color,
                                thickness, cv2.LINE_AA)
            if (len(pts) > 0):
                if (angle > 150):
                    flag = 0
                if (angle < 90):
                    flag = 1
                if (prev_flag == 1 and flag == 0):
                    counter = counter + 1

            prev_flag = flag

            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 180)
            fontScale = 4
            color = (255, 255, 255)
            thickness = 8
            text = "PushUps Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness, cv2.LINE_AA)

        elif exercise_type == 2:  #for sitUps

            for i, (pt, pid) in enumerate(zip(pts, person_ids)):
                frame, angle = draw_points_and_skeleton(
                    frame,
                    pt,
                    joints_dict()[hrnet_joints_set]['skeleton'],
                    person_index=pid,
                    points_color_palette='gist_rainbow',
                    skeleton_color_palette='jet',
                    points_palette_samples=10,
                    exercise_type=2)

            fps = 1. / (time.time() - t)
            print('\rframerate: %f fps' % fps, end='')

            #angle=findangle(frame, pts, joints_dict()[hrnet_joints_set]['skeleton'])
            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 80)
            fontScale = 1
            color = (255, 255, 255)
            thickness = 2
            frame = cv2.putText(frame, str(angle), org, font, fontScale, color,
                                thickness, cv2.LINE_AA)
            if (len(pts) > 0):
                if (angle > 168):
                    flag = 0
                if (angle < 48):
                    flag = 1
                if (prev_flag == 1 and flag == 0):
                    counter = counter + 1

            prev_flag = flag

            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 180)
            fontScale = 4
            color = (255, 255, 255)
            thickness = 8
            text = "Squats Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness, cv2.LINE_AA)

        elif exercise_type == 3:  #for ChinUps

            for i, (pt, pid) in enumerate(zip(pts, person_ids)):
                frame, dist = draw_points_and_skeleton(
                    frame,
                    pt,
                    joints_dict()[hrnet_joints_set]['skeleton'],
                    person_index=pid,
                    points_color_palette='gist_rainbow',
                    skeleton_color_palette='jet',
                    points_palette_samples=10,
                    exercise_type=3)

            fps = 1. / (time.time() - t)
            print('\rframerate: %f fps' % fps, end='')

            #angle=findangle(frame, pts, joints_dict()[hrnet_joints_set]['skeleton'])
            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 80)
            fontScale = 1
            color = (255, 255, 255)
            thickness = 2
            frame = cv2.putText(frame, str(dist), org, font, fontScale, color,
                                thickness, cv2.LINE_AA)
            if (len(pts) > 0):
                if (dist == -1 and prev_dist == 1):
                    counter = counter + 1

            prev_dist = dist

            font = cv2.FONT_HERSHEY_SIMPLEX
            x, y, l = frame.shape
            org = (50, 180)
            fontScale = 4
            color = (255, 255, 255)
            thickness = 8
            text = "ChinUps Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness, cv2.LINE_AA)

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    'arnleft.avi', fourcc, video_framerate,
                    (frame.shape[1], frame.shape[0]))
            video_writer.write(frame)

    if save_video:
        video_writer.release()
Ejemplo n.º 5
0
args = parser.parse_args()

DIR = args.file

# ROOT_DIR = os.path.abspath("./")
ROOT_DIR = args.imagedir
IMAGE_DIR = os.path.join(ROOT_DIR, DIR + '_mask')
OUT_DIR = os.path.join(ROOT_DIR, DIR + '_hrnet')

os.makedirs(OUT_DIR, exist_ok=True)

model = SimpleHRNet(48,
                    17,
                    './weights/pose_hrnet_w48_384x288.pth',
                    multiperson=False)
joints_dict = visualization.joints_dict()

file_names = next(os.walk(IMAGE_DIR))[2]

path_p = os.path.join(ROOT_DIR, 'hrnet_2d_pos.csv')
path_w = os.path.join(ROOT_DIR, DIR + '_pelvis.csv')

pos_text = ''
pelvis_text = ''

for file_name in sorted(file_names):
    image = cv2.imread(os.path.join(IMAGE_DIR, file_name), cv2.IMREAD_COLOR)
    joints = model.predict(image)

    pos2d = joints[0]
Ejemplo n.º 6
0
    def main(self, args):
        if args.device is not None:
            device = torch.device(args.device)
        else:
            if torch.cuda.is_available() and True:
                torch.backends.cudnn.deterministic = True
                device = torch.device('cuda:0')
            else:
                device = torch.device('cpu')

        print(device)

        image_resolution = ast.literal_eval(args.image_resolution)
        has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
        has_display = False
        if args.filename is not None:
            video = cv2.VideoCapture(args.filename)
            assert video.isOpened()
        else:
            if args.disable_vidgear:
                video = cv2.VideoCapture(args.camera_id)
                assert video.isOpened()
            else:
                video = CamGear(args.camera_id).start()
        model = SimpleHRNet(
            args.hrnet_c,
            args.hrnet_j,
            args.hrnet_weights,
            resolution=image_resolution,
            multiperson=not args.single_person,
            #multiperson= False,
            max_batch_size=args.max_batch_size,
            device=device)
        num_of_frame = 0
        self.num_of_std = 0
        self.error_box_text = ' '
        start = False
        flag = False
        root = os.path.join(args.save_root, 'sit_ups_v4_test')

        if not os.path.exists(root):
            os.mkdir(root)

        while True:
            if args.filename is not None or args.disable_vidgear:
                ret, self.frame = video.read()
                if not ret:
                    break
            else:
                self.frame = video.read()
                if self.frame is None:
                    break
            pts = model.predict(self.frame)

            for i, pt in enumerate(pts):
                self.frame = draw_points_and_skeleton(
                    self.frame,
                    pt,
                    joints_dict()[args.hrnet_joints_set]['skeleton'],
                    person_index=i,
                    points_color_palette='gist_rainbow',
                    skeleton_color_palette='jet',
                    points_palette_samples=10)
            print('pts', pts)
            if not start:
                self.text_ready = '请双肩着地,双手抱头'
                angle_stg, angle_sew, angle_hma_start = self.cal_angle(
                    pts, 'start')
                if angle_stg <= 5 and angle_sew <= 90 and angle_hma_start <= 10:
                    start = True
                else:
                    start = False
                self.state_box_text = self.text_ready

            elif start:
                self.text_elbow_touch_knee = '请双手抱头坐起肘部触膝'
                self.state_box_text = self.text_elbow_touch_knee

            if has_display:
                cv2.imshow('frame.png', self.frame)
                k = cv2.waitKey(1)
                if k == 27:  # Esc button
                    if args.disable_vidgear:
                        video.release()
                    else:
                        video.stop()
                    break
            else:
                ratio_between_distance, angle_hks, angle_hma_standard, x_diff_elbow_knee, avg_conf = self.cal_angle(
                    pts, 'stardard')
                print('avg_conf', avg_conf)
                if avg_conf < 0.2:
                    start = False
                    self.text = "count_{}".format(self.num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)
                    num_of_frame += 1
                    continue

                raise_feet = False if np.absolute(
                    angle_hma_start - angle_hma_standard) <= 5 else True
                if angle_hks <= 70 and start and (
                        ratio_between_distance
                        or x_diff_elbow_knee < 0) and not raise_feet:
                    self.text = "count_{}".format(self.num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)
                    self.num_of_std += 1
                    start = False
                    flag = True
                elif angle_hks <= 70 and (
                        ratio_between_distance or x_diff_elbow_knee < 0
                ) and not raise_feet and not start and not flag:
                    self.text_error = '犯规,手部动作不规范'
                    self.error_box_text = self.text_error
                    self.text = "count_{}".format(self.num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)
                else:
                    self.text = "count_{}".format(self.num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)

            #yield (self.state_box_text, self.error_box_text, self.frame, self.num_of_std)
            #print('time', time.time() - start_time)
            self.error_box_text = ' '
            num_of_frame += 1
def live(camera_id, filename, hrnet_m, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, disable_tracking, max_batch_size,
         disable_vidgear, save_video, video_format, video_framerate, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

    # print(device)
    if save_video: print('save video.')
    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    if filename is not None:
        rotation_code = check_video_rotation(filename)
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        rotation_code = None
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = OnlySimpleHRNet(hrnet_c,
                            hrnet_j,
                            hrnet_weights,
                            model_name=hrnet_m,
                            resolution=image_resolution,
                            max_batch_size=max_batch_size,
                            return_bounding_boxes=True,
                            device=device)
    nof_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0
    index = 0
    while True:
        t = time.time()

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
            if rotation_code is not None:
                frame = cv2.rotate(frame, rotation_code)
        else:
            frame = video.read()
            if frame is None:
                break
        _ = None

        pts = model.predict(frame)

        if not disable_tracking:
            boxes, pts = pts
        # import pdb;pdb.set_trace()
        if not disable_tracking:
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)
            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids

        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        for i, (pt, pid) in enumerate(zip(pts, person_ids)):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=pid,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)
        color_map = ['red', 'green', 'blue', 'yellow', 'purple', 'white']

        fps = 1. / (time.time() - t)
        print('\rframe: % 4d / %d - framerate: %f fps ' %
              (index, nof_frames - 1, fps),
              end='')
        index += 1
        # if has_display:
        #     cv2.imshow('frame.png', frame)
        #     k = cv2.waitKey(1)
        #     if k == 27:  # Esc button
        #         if disable_vidgear:
        #             video.release()
        #         else:
        #             video.stop()
        #         break
        # else:
        #     cv2.imwrite('frame.png', frame)

        video_full_name = filename.split('/')[-1]
        output_root = '/home/mmlab/CCTV_Server/golf/output'
        output_path = os.path.join(output_root, video_full_name)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    output_path, fourcc, video_framerate,
                    (frame.shape[1], frame.shape[0]))

            video_writer.write(frame)

    if save_video:
        video_writer.release()
Ejemplo n.º 8
0
    def main(self):

        if self.args.device is not None:
            device = torch.device(self.args.device)
        else:
            if torch.cuda.is_available() and True:
                torch.backends.cudnn.deterministic = True
                device = torch.device('cuda:0')
            else:
                device = torch.device('cpu')

        print(device)

        image_resolution = ast.literal_eval(self.args.image_resolution)
        has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
        has_display = False
        if self.args.filename is not None:
            video = cv2.VideoCapture(self.args.filename)
            assert video.isOpened()
        else:
            if self.args.disable_vidgear:
                video = cv2.VideoCapture(self.args.camera_id)
                assert video.isOpened()
            else:
                video = CamGear(self.args.camera_id).start()

        model = SimpleHRNet(self.args.hrnet_c,
                            self.args.hrnet_j,
                            self.args.hrnet_weights,
                            resolution=image_resolution,
                            multiperson=not self.args.single_person,
                            max_batch_size=self.args.max_batch_size,
                            device=device)
        num_of_frame = 0
        num_of_std = 0
        start = False
        flag = False
        root = os.path.join(self.args.save_root, 'test')

        if not os.path.exists(root):
            os.mkdir(root)

        while True:
            if self.args.filename is not None or self.args.disable_vidgear:
                ret, self.frame = video.read()
                if not ret:
                    break
            else:
                self.frame = video.read()
                if self.frame is None:
                    break

            pts = model.predict(self.frame)

            for i, pt in enumerate(pts):
                self.frame = draw_points_and_skeleton(
                    self.frame,
                    pt,
                    joints_dict()[self.args.hrnet_joints_set]['skeleton'],
                    person_index=i,
                    points_color_palette='gist_rainbow',
                    skeleton_color_palette='jet',
                    points_palette_samples=10)

            # if not start:
            #     #print('pts', pts)
            #     angel = cal_angle(pts, 'start')
            #     start = True if angel <= 20 else False

            if not start:
                self.text_ready = 'please ready'
                cv2.putText(self.frame, self.text_ready, (50, 50),
                            cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
                angel = self.cal_angle(pts, 'start')
                start = True if angel <= 5 else False

            if start:
                self.text_elbow_touch_knee = 'please elbow touch knee'
                cv2.putText(self.frame, self.text_elbow_touch_knee, (50, 50),
                            cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)

            if has_display:
                cv2.imshow('frame.png', self.frame)
                k = cv2.waitKey(1)
                if k == 27:  # Esc button
                    if self.args.disable_vidgear:
                        video.release()
                    else:
                        video.stop()
                    break
            else:
                angle = self.cal_angle(pts, 'stardard')
                if angle <= 50 and start:
                    text = "count_{}".format(num_of_std)

                    self.count(self.frame, text, num_of_frame, root, video)

                    start = False
                    num_of_std += 1
                    flag = True

                elif angle <= 50 and not start and not flag:
                    self.text_error = 'fault wrong hands action'
                    cv2.putText(self.frame, self.text_error, (330, 50),
                                cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
                    self.text = "count_{}".format(num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)
                    #print(type(frame))
                else:
                    self.text = "count_{}".format(num_of_std)
                    self.count(self.frame, self.text, num_of_frame, root,
                               video)

            #print('num_of_frame', num_of_frame)
            #print('pts', pts)
            num_of_frame += 1
Ejemplo n.º 9
0
def main(camera_id, filename, hrnet_c, hrnet_j, hrnet_weights, hrnet_joints_set, image_resolution, single_person,
         max_batch_size, disable_vidgear, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')

    print(device)

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'

    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = THRNet(
        hrnet_c,
        hrnet_j,
        hrnet_weights,
        resolution=image_resolution,
        multiperson=not single_person,
        max_batch_size=max_batch_size,
        device=device
    )

    while True:
        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)

        for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(frame, pt, joints_dict()[hrnet_joints_set]['skeleton'], person_index=i,
                                             points_color_palette='gist_rainbow', skeleton_color_palette='jet',
                                             points_palette_samples=10)

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # boton Esc
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)
Ejemplo n.º 10
0
def main(camera_id, filename, hrnet_c, hrnet_j, hrnet_weights, hrnet_joints_set, image_resolution, single_person,
         max_batch_size, disable_vidgear, save_video, video_format, video_framerate, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')

    print(device)

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = SimpleHRNet(
        hrnet_c,
        hrnet_j,
        hrnet_weights,
        resolution=image_resolution,
        multiperson=not single_person,
        max_batch_size=max_batch_size,
        device=device
    )

    while True:
        t = time.time()

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)

        for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(frame, pt, joints_dict()[hrnet_joints_set]['skeleton'], person_index=i,
                                             points_color_palette='gist_rainbow', skeleton_color_palette='jet',
                                             points_palette_samples=10)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps' % fps, end='')

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter('output.avi', fourcc, video_framerate, (frame.shape[1], frame.shape[0]))
            video_writer.write(frame)

    if save_video:
        video_writer.release()
Ejemplo n.º 11
0
def open_app(camera_id=0,
             filename=None,
             hrnet_c=48,
             hrnet_j=17,
             hrnet_weights="./weights/pose_hrnet_w48_384x288.pth",
             hrnet_joints_set="coco",
             image_resolution='(384, 288)',
             single_person=True,
             max_batch_size=16,
             disable_vidgear=False,
             device=None):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')
    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        max_batch_size=max_batch_size,
                        device=device)
    loaded_model = pickle.load(open("mlp_model_best.sav", 'rb'))
    no_to_label = {
        0: "tree",
        1: "warrior1",
        2: "warrior2",
        3: "childs",
        4: "downwarddog",
        5: "plank",
        6: "mountain",
        7: "trianglepose"
    }
    image_to_blob = {}
    for id, path in no_to_label.items():
        images = [
            cv2.imread(file)
            for file in glob.glob('sampleposes\\' + path + '.jpg')
        ]
        image_to_blob[id] = images
    while True:
        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break
        pts = model.predict(frame)
        resolution = frame.shape
        x_len = resolution[0]
        y_len = resolution[1]
        vector = []
        if len(pts) == 0:
            continue
        keypoints = pts[0]

        for pt in keypoints:
            pt = list(pt)
            temp = []
            temp.append((pt[0] / x_len))
            temp.append((pt[1] / y_len))
            vector.extend(temp)

        vector = list(vector)
        predicted_pose = loaded_model.predict([vector])
        text = no_to_label[predicted_pose[0]] + " pose"
        cv2.putText(image_to_blob[predicted_pose[0]][0], text,
                    bottomLeftCornerOfText, font, fontScale, fontColor,
                    lineType)
        cv2.imshow("Suggestion", image_to_blob[predicted_pose[0]][0])
        k = cv2.waitKey(1)
        for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=i,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)
Ejemplo n.º 12
0
def generate_output(
    input_filename="test.mp4",
    output_filename="output.mp4",
    exercise_type=1,
    email='*****@*****.**',
    camera_id=0,
    hrnet_weights="./weights/w32_256×192.pth",
    image_resolution="(256,192)",
    hrnet_j=17,
    hrnet_m="HRNet",
    hrnet_c=32,
    hrnet_joints_set="coco",
    single_person=True,
    use_tiny_yolo=False,
    disable_tracking=False,
    max_batch_size=16,
    disable_vidgear=False,
    save_video=True,
    video_format="MJPG",
    video_framerate=30,
    device=None,
):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")

    image_resolution = ast.literal_eval(image_resolution)
    video_writer = None
    if input_filename is not None:
        video = cv2.VideoCapture(input_filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    if use_tiny_yolo:
        yolo_model_def = "./models/detectors/yolo/config/yolov3-tiny.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3-tiny.weights"
    else:
        yolo_model_def = "./models/detectors/yolo/config/yolov3.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3.weights"

    model = SimpleHRNet(
        hrnet_c,
        hrnet_j,
        hrnet_weights,
        model_name=hrnet_m,
        resolution=image_resolution,
        multiperson=not single_person,
        return_heatmaps=False,
        return_bounding_boxes=not disable_tracking,
        max_batch_size=max_batch_size,
        yolo_model_def=yolo_model_def,
        yolo_class_path=yolo_class_path,
        yolo_weights_path=yolo_weights_path,
        device=device,
    )

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0

    flag = 0
    prev_flag = flag
    counter = 0
    data = 0
    prev_data = data

    while True:
        t = time.time()

        if input_filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)
        if not disable_tracking:
            boxes, pts = pts
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)

            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids
        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        for i, (pt, pid) in enumerate(zip(pts, person_ids)):
            frame, data = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]["skeleton"],
                person_index=pid,
                exercise_type=exercise_type,
            )

        frame = cv2.rectangle(
            frame,
            (0, 0),
            (int(frame.shape[1] * 0.7), int(frame.shape[0] * 0.1)),
            (0, 0, 0),
            -1,
        )

        fps = 1.0 / (time.time() - t)
        font = cv2.FONT_HERSHEY_SIMPLEX
        org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.035))
        fontScale = frame.shape[0] * 0.0014
        color = (255, 255, 255)
        thickness = 1
        frame = cv2.putText(
            frame,
            "FPS: {:.3f}".format(fps),
            org,
            font,
            fontScale * 0.35,
            color,
            thickness,
            cv2.LINE_AA,
        )

        if exercise_type == 1:  # for pushUps

            if len(pts) > 0:
                if data > 160:
                    flag = 0
                if data < 90:
                    flag = 1
                if prev_flag == 1 and flag == 0:
                    counter = counter + 1

            prev_flag = flag

            org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.08))
            text = "PushUps Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness * 2, cv2.LINE_AA)

        elif exercise_type == 2:  # for Squats

            if len(pts) > 0:
                if data > 150:
                    flag = 0
                if data < 90:
                    flag = 1
                if prev_flag == 1 and flag == 0:
                    counter = counter + 1

            prev_flag = flag

            org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.08))
            text = "Situps Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness * 2, cv2.LINE_AA)

        elif exercise_type == 3:  # for PullUps

            if len(pts) > 0:
                if data == -1 and prev_data == 1:
                    counter = counter + 1

            prev_data = data

            org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.08))
            text = "PullUps Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness * 2, cv2.LINE_AA)

        elif exercise_type == 4:  # for dumbell curl

            if len(pts) > 0:
                if data > 110:
                    flag = 0
                if data < 65:
                    flag = 1
                if prev_flag == 1 and flag == 0:
                    counter = counter + 1

            prev_flag = flag

            org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.08))
            text = "Dumbell Curl Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness * 2, cv2.LINE_AA)

        elif exercise_type == 5:  # for dumbell side lateral

            if len(pts) > 0:
                if data == -1 and prev_data == 1:
                    counter = counter + 1

            prev_data = data

            org = (int(frame.shape[1] * 0.01), int(frame.shape[0] * 0.08))
            text = "Dumbell Side Count=" + str(counter)
            frame = cv2.putText(frame, text, org, font, fontScale, color,
                                thickness * 2, cv2.LINE_AA)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    output_filename,
                    fourcc,
                    video_framerate,
                    (frame.shape[1], frame.shape[0]),
                )
            video_writer.write(frame)

    if save_video:
        video_writer.release()
    print("Video processing complete")

    mail_content = f'''Hey,
    Your video has finished processing. You can view your video here : 
    {SITE_URL}{output_filename}
    Thank You
    '''

    message = MIMEMultipart()
    message['From'] = SENDER_ADDRESS
    message['To'] = email
    message['Subject'] = 'Exercise Counter Processing Finished'

    message.attach(MIMEText(mail_content, 'plain'))
    text = message.as_string()
    session.sendmail(SENDER_ADDRESS, email, text)

    print("email sent")
Ejemplo n.º 13
0
def main(camera_id, filename, hrnet_c, hrnet_j, hrnet_weights, hrnet_joints_set, image_resolution, single_person,
         max_batch_size, disable_vidgear, device, save_root, save_dir):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')

    print(device)

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    has_display = False

    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = SimpleHRNet(
        hrnet_c,
        hrnet_j,
        hrnet_weights,
        resolution=image_resolution,
        multiperson=not single_person,
        max_batch_size=max_batch_size,
        device=device
    )

    num_of_std = 0
    num_of_frame = 0
    start = False
    root = os.path.join(save_root, 'test_v1')

    if not os.path.exists(root):
        os.mkdir(root)

    while True:

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)
        if len(pts) == 0:
            continue

        for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(frame, pt, joints_dict()[hrnet_joints_set]['skeleton'], person_index=i,
                                             points_color_palette='gist_rainbow', skeleton_color_palette='jet',
                                             points_palette_samples=10)


        print('num of frame', num_of_frame)

        if not start:
            print('pts', pts)
            angel = cal_angle(pts)
            start = True if angel >= 150 else False
        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            left_ear_height = pts[0][3][0]  # pts -> (y,x ,conf)
            right_ear_height = pts[0][4][0]
            avg_ear_height = (left_ear_height + right_ear_height) / 2

            left_writst_height = pts[0][9][0]
            right_writst_height = pts[0][10][0]
            avg_wrist_height = (left_writst_height + right_writst_height) / 2

            left_shoulder_height = pts[0][5][0]
            right_shoulder_height = pts[0][6][0]
            avg_shoulder_height = (left_shoulder_height + right_shoulder_height) / 2

            if avg_ear_height < avg_wrist_height:
                ear_wrist_diff = avg_wrist_height - avg_ear_height
                wrist_shoulder_diff = avg_shoulder_height - avg_wrist_height

                ratio = ear_wrist_diff / wrist_shoulder_diff

                if 0.5 <= ratio <= 2 and start:
                    text = "count:{}".format(num_of_std)
                    num_of_std+=1
                    count(frame, text, num_of_frame, root, video)
                    start = False
                else:
                    text = "count:{}".format(num_of_std)
                    count(frame, text, num_of_frame, root, video)
            else:
                text = "count:{}".format(num_of_std)
                count(frame, text, num_of_frame, root, video)



        num_of_frame += 1
Ejemplo n.º 14
0
def main(camera_id, filename, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, single_person, max_batch_size,
         disable_vidgear, device, save_root):

    #def main():
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available() and True:
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')

    print(device)
    print('max_batch_size', max_batch_size)
    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    has_display = False
    if filename is not None:
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            print('debug')
            video = CamGear(camera_id).start()

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        max_batch_size=max_batch_size,
                        device=device)
    num_of_frame = 0
    num_of_std = 0
    start = False
    flag = False
    root = os.path.join(save_root, 'sit_ups_v2_add_angle_hks')

    if not os.path.exists(root):
        os.mkdir(root)

    while True:
        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)

        for i, pt in enumerate(pts):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=i,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)

        # if not start:
        #     #print('pts', pts)
        #     angel = cal_angle(pts, 'start')
        #     start = True if angel <= 20 else False

        if not start:
            #text_ready = 'please ready'
            #cv2.putText(frame, text_ready, (50,50), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
            angle_stg, angle_sew, angle_ewe = cal_angle(pts, 'start')
            print('angle_ewe ', angle_ewe)
            if angle_stg <= 5 and angle_sew <= 90 and angle_ewe >= 120:
                start = True
            else:
                start = False
            #start = True if angel <= 5 else False

        # if start:
        #     #text_elbow_touch_knee = 'please elbow touch knee'
        #     cv2.putText(frame, text_elbow_touch_knee, (50, 50), cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            angle_mke, angle_hks = cal_angle(pts, 'stardard')
            print('angle_mke', angle_mke)
            print('angle_hks', angle_hks)
            # if angle_hks <= 50 and start and flag_elblow_over_knee:
            if start and angle_mke <= 90 and angle_hks <= 50:

                num_of_std += 1
                text = "count_{}".format(num_of_std)
                count(frame, text, num_of_frame, root, video)
                start = False
                flag = True

            elif angle_hks <= 60 and not start and not flag:
                print('True')
                text_error = 'fault wrong hands action'
                cv2.putText(frame, text_error, (330, 50),
                            cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255), 2)
                text = "count_{}".format(num_of_std)
                count(frame, text, num_of_frame, root, video)
                #print(type(frame))
            else:
                text = "count_{}".format(num_of_std)
                count(frame, text, num_of_frame, root, video)

        print('num_of_frame', num_of_frame)
        #print('pts', pts)
        num_of_frame += 1
    def match_by_tag_torch(self, data):
        joint_order = visualization.joints_dict()[self.joint_set]['order']

        tag_k, loc_k, val_k = data
        device = tag_k.device
        default_ = torch.zeros((self.num_joints, 3 + tag_k.shape[2]),
                               device=device)

        loc_k = loc_k.float()
        joint_k = torch.cat((loc_k, val_k[..., None], tag_k),
                            dim=2)  # nx30x2, nx30x1, nx30x1

        joint_dict = defaultdict(lambda: default_.clone().detach())
        tag_dict = {}
        for i in range(self.num_joints):
            idx = joint_order[i]

            tags = tag_k[idx]
            joints = joint_k[idx]
            mask = joints[:, 2] > self.detection_threshold
            tags = tags[mask]
            joints = joints[mask]

            if joints.shape[0] == 0:
                continue

            if i == 0 or len(joint_dict) == 0:
                for tag, joint in zip(tags, joints):
                    key = tag[0]
                    joint_dict[key.item()][idx] = joint
                    tag_dict[key.item()] = [tag]
            else:
                grouped_keys = list(joint_dict.keys())[:self.max_num_people]
                grouped_tags = [
                    torch.mean(torch.as_tensor(tag_dict[i]),
                               dim=0,
                               keepdim=True) for i in grouped_keys
                ]

                if self.ignore_too_much and len(
                        grouped_keys) == self.max_num_people:
                    continue

                grouped_tags = torch.as_tensor(grouped_tags, device=device)
                if len(grouped_tags.shape) < 2:
                    grouped_tags = grouped_tags.unsqueeze(0)

                diff = joints[:, None, 3:] - grouped_tags[None, :, :]
                diff_normed = torch.norm(diff, p=2, dim=2)
                diff_saved = diff_normed.clone().detach()

                if self.use_detection_val:
                    diff_normed = torch.round(diff_normed) * 100 - joints[:,
                                                                          2:3]

                num_added = diff.shape[0]
                num_grouped = diff.shape[1]

                if num_added > num_grouped:
                    diff_normed = torch.cat(
                        (diff_normed,
                         torch.zeros((num_added, num_added - num_grouped),
                                     device=device) + 1e10),
                        dim=1)

                pairs = py_max_match(diff_normed.detach().cpu().numpy())
                for row, col in pairs:
                    if (row < num_added and col < num_grouped
                            and diff_saved[row][col] < self.tag_threshold):
                        key = grouped_keys[col]
                        joint_dict[key][idx] = joints[row]
                        tag_dict[key].append(tags[row])
                    else:
                        key = tags[row][0].item()
                        joint_dict[key][idx] = joints[row]
                        tag_dict[key] = [tags[row]]

        # # added to correctly limit the overall number of people
        # # this shouldn't be needed if self.ignore_too_much is True
        # if len(joint_dict.keys()) > self.max_num_people:
        #     # create a dictionary with {confidence: joint_dict key}
        #     joint_confidence = {torch.mean(v[:, 2]).item(): k for k, v in joint_dict.items()}
        #     # filter joint_dict to keep the first self.max_num_people elements with higher joint confidence
        #     joint_dict = {joint_confidence[k]: joint_dict[joint_confidence[k]]
        #                   for k in sorted(joint_confidence.keys(), reverse=True)[:self.max_num_people]}

        # ret = torch.tensor([joint_dict[i] for i in joint_dict], dtype=torch.float32, device=device)
        if len(joint_dict) > 0:
            ret = torch.stack([joint_dict[i] for i in joint_dict])
        else:
            # if no people are detected, return a tensor with size 0
            size = list(default_.size())
            size.insert(0, 0)
            ret = torch.zeros(size)
        return ret
Ejemplo n.º 16
0
def main(camera_id, filename, hrnet_m, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, single_person, use_tiny_yolo,
         disable_tracking, max_batch_size, disable_vidgear, save_video,
         video_format, video_framerate, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda:0')
        else:
            device = torch.device('cpu')

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None
    falldown = FallDown()

    if foldername is not None:
        images_path = foldername
        images_name = os.listdir(images_path)
        images_path = [os.path.join(images_path, name) for name in images_name]
        images_path.sort()

    else:
        if filename is not None:
            rotation_code = check_video_rotation(filename)
            video = cv2.VideoCapture(filename)
            assert video.isOpened()
        else:
            rotation_code = None
            if disable_vidgear:
                video = cv2.VideoCapture(camera_id)
                assert video.isOpened()
            else:
                video = CamGear(camera_id).start()

    if use_tiny_yolo:
        yolo_model_def = "./models/detectors/yolo/config/yolov3-tiny.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3-tiny.weights"
    else:
        yolo_model_def = "./models/detectors/yolo/config/yolov3.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3.weights"

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        model_name=hrnet_m,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        return_bounding_boxes=not disable_tracking,
                        max_batch_size=max_batch_size,
                        yolo_model_def=yolo_model_def,
                        yolo_class_path=yolo_class_path,
                        yolo_weights_path=yolo_weights_path,
                        device=device)

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0

    step = 0
    while True:
        t = time.time()

        if foldername is None:
            if filename is not None or disable_vidgear:
                ret, frame = video.read()
                if not ret:
                    break
                if rotation_code is not None:
                    frame = cv2.rotate(frame, rotation_code)
            else:
                frame = video.read()
                if frame is None:
                    break

        else:
            if step >= len(images_path):
                break

            # Pre-process
            images = []
            images_origin = []

            path = images_path[step]

            frame = cv2.imread(path, cv2.IMREAD_COLOR)
            if frame is None:
                logging.error("read image error: {}. skip it.".format(path))
                continue

        pts = model.predict(frame)

        if not disable_tracking:
            boxes, pts = pts

        if not disable_tracking:
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)
            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids

        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        for i, (pt, pid) in enumerate(zip(pts, person_ids)):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=pid,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)

            frame = falldown.check_fall_down(
                frame, pt,
                joints_dict()[hrnet_joints_set]['skeleton'], video_framerate)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps' % fps, end='')

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    'output.avi', fourcc, video_framerate,
                    (frame.shape[1], frame.shape[0]))
            video_writer.write(frame)

    if save_video:
        video_writer.release()
Ejemplo n.º 17
0
def image_keypoint(
        frame,
        camera_id=0,
        filename=None,
        hrnet_m='HRNet',
        hrnet_c=48,
        hrnet_j=17,
        hrnet_weights="/media/koshiba/Data/simple-HRNet/weights/pose_hrnet_w48_384x288.pth",
        hrnet_joints_set="coco",
        image_resolution='(384, 288)',
        single_person="store_true",
        use_tiny_yolo="store_true",
        disable_tracking="store_true",
        max_batch_size=16,
        disable_vidgear="store_true",
        save_video="store_true",
        video_format='MJPG',
        video_framerate=30,
        device=None):

    torch.backends.cudnn.deterministic = True
    device = torch.device('cuda')

    inputPath = '/media/koshiba/Data/simple-HRNet/inputData'
    hrnetPath = '/media/koshiba/Data/simple-HRNet'

    # print(device)

    image_resolution = ast.literal_eval(image_resolution)
    video_writer = None
    '''
    videoName = filename.split('/')[-1][:-4]
    print(videoName)

    rotation_code = check_video_rotation(filename)
    video = cv2.VideoCapture(filename)
    assert video.isOpened()

    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) #幅
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) #高
    frame_rate = int(video.get(cv2.CAP_PROP_FPS)) #FPS
    print(width, height, frame_rate)
    fourcc = cv2.VideoWriter_fourcc('m','p','4','v') #mp4出力

    video_writer = cv2.VideoWriter('/media/koshiba/Data/simple-HRNet/outputData/' + videoName + '_output.mp4', fourcc, frame_rate, (width, height))
    '''

    if use_tiny_yolo:
        yolo_model_def = hrnetPath + "/models/detectors/yolo/config/yolov3-tiny.cfg"
        yolo_class_path = hrnetPath + "/models/detectors/yolo/data/coco.names"
        yolo_weights_path = hrnetPath + "/models/detectors/yolo/weights/yolov3-tiny.weights"
    else:
        yolo_model_def = hrnetPath + "/models/detectors/yolo/config/yolov3.cfg"
        yolo_class_path = hrnetPath + "/models/detectors/yolo/data/coco.names"
        yolo_weights_path = hrnetPath + "/models/detectors/yolo/weights/yolov3.weights"

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        model_name=hrnet_m,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        return_bounding_boxes=not disable_tracking,
                        max_batch_size=max_batch_size,
                        yolo_model_def=yolo_model_def,
                        yolo_class_path=yolo_class_path,
                        yolo_weights_path=yolo_weights_path,
                        device=device)

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0
    '''
    while True:
        t = time.time()

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
            if rotation_code is not None:
                frame = cv2.rotate(frame, rotation_code)
        else:
            frame = video.read()
            if frame is None:
                break
    '''
    pts = model.predict(frame)

    if not disable_tracking:
        boxes, pts = pts

    if not disable_tracking:
        if len(pts) > 0:
            if prev_pts is None and prev_person_ids is None:
                person_ids = np.arange(next_person_id,
                                       len(pts) + next_person_id,
                                       dtype=np.int32)
                next_person_id = len(pts) + 1
            else:
                boxes, pts, person_ids = find_person_id_associations(
                    boxes=boxes,
                    pts=pts,
                    prev_boxes=prev_boxes,
                    prev_pts=prev_pts,
                    prev_person_ids=prev_person_ids,
                    next_person_id=next_person_id,
                    pose_alpha=0.2,
                    similarity_threshold=0.4,
                    smoothing_alpha=0.1,
                )
                next_person_id = max(next_person_id, np.max(person_ids) + 1)
        else:
            person_ids = np.array((), dtype=np.int32)

        prev_boxes = boxes.copy()
        prev_pts = pts.copy()
        prev_person_ids = person_ids

    else:
        person_ids = np.arange(len(pts), dtype=np.int32)

    for i, (pt, pid) in enumerate(zip(pts, person_ids)):
        frame = draw_points_and_skeleton(
            frame,
            pt,
            joints_dict()[hrnet_joints_set]['skeleton'],
            person_index=pid,
            points_color_palette='gist_rainbow',
            skeleton_color_palette='jet',
            points_palette_samples=10)
    '''
    if video_writer is None:
        fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
        video_writer = cv2.VideoWriter('/media/koshiba/Data/simple-HRNet/outputData/' + videoName + '_output.avi', fourcc, video_framerate, (frame.shape[1], frame.shape[0]))
    video_writer.write(frame)

    video_writer.release()
    print(person_ids)
    '''
    #print(pts)
    return pts, frame
Ejemplo n.º 18
0
def main(camera_id, filename, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, disable_tracking, max_nof_people,
         max_batch_size, disable_vidgear, save_video, video_format,
         video_framerate, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

    # print(device)

    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    if filename is not None:
        rotation_code = check_video_rotation(filename)
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
    else:
        rotation_code = None
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    model = SimpleHigherHRNet(hrnet_c,
                              hrnet_j,
                              hrnet_weights,
                              resolution=image_resolution,
                              return_bounding_boxes=not disable_tracking,
                              max_nof_people=max_nof_people,
                              max_batch_size=max_batch_size,
                              device=device)

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0

    while True:
        t = time.time()

        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            if not ret:
                break
            if rotation_code is not None:
                frame = cv2.rotate(frame, rotation_code)
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)

        if not disable_tracking:
            boxes, pts = pts

        if not disable_tracking:
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)
            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids

        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        for i, (pt, pid) in enumerate(zip(pts, person_ids)):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=pid,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps / detected people: %d' % (fps, len(pts)),
              end='')

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            cv2.imwrite('frame.png', frame)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    'output.avi', fourcc, video_framerate,
                    (frame.shape[1], frame.shape[0]))
            video_writer.write(frame)

    if save_video:
        video_writer.release()
def main(camera_id, filename, hrnet_m, hrnet_c, hrnet_j, hrnet_weights,
         hrnet_joints_set, image_resolution, single_person, use_tiny_yolo,
         disable_tracking, max_batch_size, disable_vidgear, save_video,
         video_format, video_framerate, device):
    if device is not None:
        device = torch.device(device)
    else:
        if torch.cuda.is_available():
            torch.backends.cudnn.deterministic = True
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

    # print(device)

    image_resolution = ast.literal_eval(image_resolution)
    has_display = 'DISPLAY' in os.environ.keys() or sys.platform == 'win32'
    video_writer = None

    if filename is not None:
        rotation_code = check_video_rotation(filename)
        video = cv2.VideoCapture(filename)
        assert video.isOpened()
        #nof_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
    else:
        rotation_code = None
        if disable_vidgear:
            video = cv2.VideoCapture(camera_id)
            assert video.isOpened()
        else:
            video = CamGear(camera_id).start()

    if use_tiny_yolo:
        yolo_model_def = "./models/detectors/yolo/config/yolov3-tiny.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3-tiny.weights"
    else:
        yolo_model_def = "./models/detectors/yolo/config/yolov3.cfg"
        yolo_class_path = "./models/detectors/yolo/data/coco.names"
        yolo_weights_path = "./models/detectors/yolo/weights/yolov3.weights"

    model = SimpleHRNet(hrnet_c,
                        hrnet_j,
                        hrnet_weights,
                        model_name=hrnet_m,
                        resolution=image_resolution,
                        multiperson=not single_person,
                        return_bounding_boxes=not disable_tracking,
                        max_batch_size=max_batch_size,
                        yolo_model_def=yolo_model_def,
                        yolo_class_path=yolo_class_path,
                        yolo_weights_path=yolo_weights_path,
                        device=device)

    if not disable_tracking:
        prev_boxes = None
        prev_pts = None
        prev_person_ids = None
        next_person_id = 0

    steps_cnt = 1
    flag = 0
    while True:
        t = time.time()
        if filename is not None or disable_vidgear:
            ret, frame = video.read()
            nof_frames = video.get(cv2.CAP_PROP_POS_FRAMES)
            print(nof_frames)
            # #Code for bounding box and cropping of the video
            # bbox, label, conf = cv.detect_common_objects(frame)
            # frame_bounding = draw_bbox(frame, bbox, label, conf)
            # #bb.add(image, left, top, right, bottom, label, color)
            # if save_video:
            #     if video_writer is None:
            #         fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
            #         video_writer = cv2.VideoWriter('output_bounding.avi', fourcc, video_framerate, (frame.shape[1], frame.shape[0]))
            #     video_writer.write(frame_bounding)

            if not ret:
                print('\rstep_count: %d' % steps_cnt, end='\n')
                #print (steps_cnt)
                break
            if rotation_code is not None:
                frame = cv2.rotate(frame, rotation_code)
        else:
            frame = video.read()
            if frame is None:
                break

        pts = model.predict(frame)
        #print(pts[1][0][0][2])
        left_hip = np.array(pts[1][0][11])
        left_knee = np.array(pts[1][0][13])
        left_ankle = np.array(pts[1][0][15])
        right_hip = np.array(pts[1][0][12])
        right_knee = np.array(pts[1][0][14])
        right_ankle = np.array(pts[1][0][16])

        ba = left_hip - left_knee
        bc = left_ankle - left_knee
        left_cosine_angle = np.dot(
            ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
        left_angle = np.arccos(left_cosine_angle)
        left_angle = np.degrees(left_angle)
        position1 = (10, 50)
        position1_1 = (200, 50)

        de = right_hip - right_knee
        df = right_ankle - right_knee
        right_cosine_angle = np.dot(
            de, df) / (np.linalg.norm(de) * np.linalg.norm(df))
        right_angle = np.arccos(right_cosine_angle)
        right_angle = np.degrees(right_angle)
        position2 = (10, 100)
        position2_1 = (200, 100)

        if (left_angle >= 130 and right_angle < 110) or (right_angle >= 130
                                                         and left_angle < 110):
            gc = 'Yes'
            flag = 0
        else:
            gc = 'No'
            if (flag == 0):
                steps_cnt = steps_cnt + 1
                flag = 1

        position3 = (10, 150)
        position3_1 = (300, 150)

        # fontScale
        fontScale = 1

        # Blue color in BGR
        color = (0, 0, 255)

        # Line thickness of 2 px
        thickness = 2
        #print (left_angle)
        #print (right_angle)

        if not disable_tracking:
            boxes, pts = pts

        if not disable_tracking:
            if len(pts) > 0:
                if prev_pts is None and prev_person_ids is None:
                    person_ids = np.arange(next_person_id,
                                           len(pts) + next_person_id,
                                           dtype=np.int32)
                    next_person_id = len(pts) + 1
                else:
                    boxes, pts, person_ids = find_person_id_associations(
                        boxes=boxes,
                        pts=pts,
                        prev_boxes=prev_boxes,
                        prev_pts=prev_pts,
                        prev_person_ids=prev_person_ids,
                        next_person_id=next_person_id,
                        pose_alpha=0.2,
                        similarity_threshold=0.4,
                        smoothing_alpha=0.1,
                    )
                    next_person_id = max(next_person_id,
                                         np.max(person_ids) + 1)
            else:
                person_ids = np.array((), dtype=np.int32)

            prev_boxes = boxes.copy()
            prev_pts = pts.copy()
            prev_person_ids = person_ids

        else:
            person_ids = np.arange(len(pts), dtype=np.int32)

        for i, (pt, pid) in enumerate(zip(pts, person_ids)):
            frame = draw_points_and_skeleton(
                frame,
                pt,
                joints_dict()[hrnet_joints_set]['skeleton'],
                person_index=pid,
                points_color_palette='gist_rainbow',
                skeleton_color_palette='jet',
                points_palette_samples=10)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps' % fps, end='\n')
        #print(steps_cnt)

        if has_display:
            cv2.imshow('frame.png', frame)
            k = cv2.waitKey(1)
            if k == 27:  # Esc button
                if disable_vidgear:
                    video.release()
                else:
                    video.stop()
                break
        else:
            frame = cv2.putText(frame, str('left_angle:'), position1,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            frame = cv2.putText(frame, str(left_angle), position1_1,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            frame = cv2.putText(frame, str('right_angle:'), position2,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            frame = cv2.putText(frame, str(right_angle), position2_1,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            frame = cv2.putText(frame, str('Ground_Contact:'), position3,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            frame = cv2.putText(frame, str(gc), position3_1,
                                cv2.FONT_HERSHEY_SIMPLEX, fontScale, color,
                                thickness, cv2.LINE_AA)
            #bbox, label, conf = cv.detect_common_objects(frame)
            #frame = draw_bbox(frame, bbox, label, conf)
            #bb.add(image, left, top, right, bottom, label, color)
            cv2.imwrite('frame.png', frame)

        if save_video:
            if video_writer is None:
                fourcc = cv2.VideoWriter_fourcc(*video_format)  # video format
                video_writer = cv2.VideoWriter(
                    'output.avi', fourcc, video_framerate,
                    (frame.shape[1], frame.shape[0]))
            video_writer.write(frame)

    if save_video:
        video_writer.release()