Ejemplo n.º 1
0
class PoseWrapper:
    @staticmethod
    def distance_kps(kp1, kp2):
        # kp1 and kp2: numpy array of shape (3,): [x,y,conf]
        x1, y1, c1 = kp1
        x2, y2, c2 = kp2
        if c1 > 0 and c2 > 0:
            return np.linalg.norm(kp1[:2] - kp2[:2])
        else:
            return 0

    def __init__(self, draw_render=False):
        self.draw_render = draw_render

        self.net = InferenceEnginePyTorch('human-pose-estimation-3d.pth',
                                          'GPU')

    def eval(self, frame):
        self.frame = frame

        base_height = 256
        scale = base_height / self.frame.shape[0]
        scaled_img = cv2.resize(self.frame, dsize=None, fx=scale, fy=scale)
        inference_result = self.net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, scale, 8, 1)

        if self.draw_render:
            draw_poses(self.frame, poses_2d)

        if poses_2d.shape[
                0] != 0:  # When no person is detected, shape = (), else (nb_persons, 25, 3)
            self.body_kps = np.array([
                np.array(poses_2d[pose_id][0:-1]).reshape((-1, 3))
                for pose_id in range(len(poses_2d))
            ])

            # We sort persons by their an "estimation" of their size
            # size has little to do with the real size of a person, but is a arbitrary value, here, calculated as distance(Nose, Neck) + 0.33*distance(Neck,Midhip)
            sizes = np.array([
                self.length(pairs_spine, person_idx=i, coefs=[1, 0.33])
                for i in range(self.body_kps.shape[0])
            ])

            # Sort from biggest size to smallest
            order = np.argsort(-sizes)
            sizes = sizes[order]
            self.body_kps = self.body_kps[order]

            # Keep only the biggest person
            self.body_kps = self.body_kps[0]

            self.nb_persons = 1
        else:
            self.nb_persons = 0
            self.body_kps = []

        return self.nb_persons, self.body_kps

    def get_body_kp(self, kp_name="Neck"):
        """
            Return the coordinates of a keypoint named 'kp_name' of the person of index 'person_idx' (from 0), or None if keypoint not detected
        """
        try:
            x, y, conf = self.body_kps[body_kp_name_to_id[kp_name]]
        except:
            print(f"get_body_kp: invalid kp_name '{kp_name}'")
            return None
        if conf > 0:
            return (int(x), int(y))
        else:
            return None

    def length(self, pairs, person_idx=0, coefs=None):
        """
            Calculate the mean of the length of the pairs in the list 'pairs' for the person of index 'person_idx' (from 0)
            If one (or both) of the 2 points of a pair is missing, the number of pairs used to calculate the average is decremented of 1
        """
        if coefs is None:
            coefs = [1] * len(pairs)

        person = self.body_kps[person_idx]

        l_cum = 0
        n = 0
        for i, pair in enumerate(pairs):
            l = self.distance_kps(person[body_kp_name_to_id[pair.p1]],
                                  person[body_kp_name_to_id[pair.p2]])
            if l != 0:
                l_cum += l * coefs[i]
                n += 1
        if n > 0:
            return l_cum / n
        else:
            return 0
Ejemplo n.º 2
0
def run_inference(args):
    from modules.inference_engine_pytorch import InferenceEnginePyTorch

    socket_server = SocketServer(args.port)
    joint_angle_calculator = JointAngleCalculator()

    stride = 8

    model_path = os.path.join('models', 'human-pose-estimation-3d.pth')
    net = InferenceEnginePyTorch(model_path, "GPU")

    canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8)
    plotter = Plotter3d(canvas_3d.shape[:2])
    canvas_3d_window_name = 'Canvas 3D'
    cv2.namedWindow(canvas_3d_window_name)
    cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback)

    file_path = None
    if file_path is None:
        file_path = os.path.join('data', 'extrinsics.json')
    with open(file_path, 'r') as f:
        extrinsics = json.load(f)
    R = np.array(extrinsics['R'], dtype=np.float32)
    t = np.array(extrinsics['t'], dtype=np.float32)

    frame_provider = ImageReader(args.images)
    is_video = False
    if args.video != '':
        frame_provider = VideoReader(args.video)
        is_video = True
    base_height = args.height_size
    fx = 1 # focal length

    delay = 1
    esc_code = 27
    p_code = 112
    space_code = 32
    mean_time = 0

    for frame in frame_provider:
        current_time = cv2.getTickCount()
        if frame is None:
            break
        input_scale = base_height / frame.shape[0]
        scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale)
        scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)]  # better to pad, but cut out for demo
        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

        inference_result = net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video)
        edges = []

        if len(poses_3d):
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))

        plotter.plot(canvas_3d, poses_3d, edges)
        cv2.imshow(canvas_3d_window_name, canvas_3d)

        draw_poses(frame, poses_2d)
        current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency()
        if mean_time == 0:
            mean_time = current_time
        else:
            mean_time = mean_time * 0.95 + current_time * 0.05
        cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10),
                    (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255))
        cv2.imshow('ICV 3D Human Pose Estimation', frame)

        key = cv2.waitKey(delay)
        if key == esc_code:
            break
        if key == p_code:
            if delay == 1:
                delay = 0
            else:
                delay = 1
        if delay == 0 or not is_video:  # allow to rotate 3D canvas while on pause
            key = 0
            while (key != p_code
                   and key != esc_code
                   and key != space_code):
                plotter.plot(canvas_3d, poses_3d, edges)
                cv2.imshow(canvas_3d_window_name, canvas_3d)
                key = cv2.waitKey(33)
            if key == esc_code:
                break
            else:
                delay = 1
        
        joint_angles = joint_angle_calculator.calculate_angles(poses_3d)
        if joint_angles:
            socket_server.send_data(joint_angles)
Ejemplo n.º 3
0
def pose3d():
    stride = 8
    #f.write("check8");
    net = InferenceEnginePyTorch(
        PyPATH + '\model\human-pose-estimation-3d.pth', 'GPU')

    with open(PyPATH + '\parameters\extrinsics.json', 'r') as f:
        extrinsics = json.load(f)
    R = np.array(extrinsics['R'], dtype=np.float32)
    t = np.array(extrinsics['t'], dtype=np.float32)

    cap = cv2.VideoCapture(1)

    if not (cap.isOpened):
        print("Webcam not recognized")

    base_height = 256

    while True:
        ret, frame = cap.read()
        if (ret == False):
            continue
        #time.sleep(1)
        #print(ret)
        input_scale = base_height / frame.shape[0]
        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)
        #scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)]  # better to pad, but cut out for demo
        fx = np.float32(0.8 * frame.shape[1])

        inference_result = net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride,
                                         fx)
        if len(poses_2d):
            poses_3d_copy = poses_2d.copy()
            x = poses_3d_copy[:, 0::3]
            y = poses_3d_copy[:, 1::3]
            #0 - GRUD
            #1 - nose
            #2 - nothing
            #3 - levoe plecho
            #4 - levi lokot
            #5 - levi zapastie
            #6 - levoe taz
            #7 - levi koleno
            #8 - levi stopa
            #9 - pravoe plecho
            #10 - pravoe lokot
            #11 - pravoe zapastie
            #12 - pravoe taz
            #13 - pravoe koleno
            #14 - pravoe stopa
            frame = cv2.circle(frame, (x[0][7], y[0][7]), 10, (255, 0, 0))
            print("x")
            print(x[0][7])
            print("y")
            print(y[0][7])
            #frame
            #size = len(x[0])
            #i = 0
            #for a in x[0]:
            #frame = cv2.circle(frame, (x[0][i], y[0][i]), 10,(255, 0, 0))
            #i=i+1
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            #print(poses_3d_copy)
            #print("x")
            #print(x)
            #print("y")
            #print(y)
        if len(poses_3d):
            poses_3d_copy = poses_3d.copy()
            #poses_3d_1 = poses_3d
            #poses_3d_2 = poses_3d
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
class InferCtrl:
    net = None
    extrinsics = None

    def __init__(self,
                 model,
                 height=256,
                 device='CPU',
                 openvino=False,
                 tensorrt=False,
                 extrinsics_path="./data/extrinsics.json",
                 fx=1,
                 canvas_shape=(720, 1280, 3)) -> None:

        if openvino:
            from modules.inference_engine_openvino import InferenceEngineOpenVINO
            self.net = InferenceEngineOpenVINO(model, device)
        else:
            from modules.inference_engine_pytorch import InferenceEnginePyTorch
            self.net = InferenceEnginePyTorch(model, device, tensorrt)

        try:
            with open(extrinsics_path, 'r') as f:
                self.extrinsics = json.load(f)
        except Exception:
            with open("./data/extrinsics.json", 'r') as f:
                self.extrinsics = json.load(f)
            traceback.print_exc()

        self.base_height = height
        self.fx = fx
        self.canvas_3d = np.zeros(canvas_shape, dtype=np.uint8)
        self.plotter = Plotter3d(self.canvas_3d.shape[:2])

        # print("[INFO] plotter shape {}".format(self.plotter.shape))
        print("[INFO] canvas shape {}".format(self.canvas_3d.shape))

    def process_frame(self, frame, inference_result, merged=False):
        poses_3d = inference_result.get("pose_3d", {}).get("value", [])
        poses_2d = inference_result.get("pose_2d", {}).get("value", [])
        edges = inference_result.get("edges", {}).get("value", [])

        self.plotter.plot(self.canvas_3d, poses_3d, edges)
        draw_poses(frame, poses_2d)

        if merged:
            frame_side = np.copy(self.canvas_3d)

            new_w = min(frame.shape[1], frame_side.shape[1])

            rel_h_f = int(new_w * frame.shape[0] * 1.0 / frame.shape[1])
            rel_h_s = int(new_w * frame_side.shape[0] * 1.0 /
                          frame_side.shape[1])

            frame = cv2.resize(frame, (new_w, rel_h_f))
            frame_side = cv2.resize(frame_side, (new_w, rel_h_s))

            return np.hstack([frame, frame_side])

        return frame, np.copy(self.canvas_3d)

    def infer(self, frame, is_video=True, fx=None):
        stride = 8
        output = {}
        if fx is None:
            fx = self.fx
            output["focal_length"] = {
                "value": fx,
                "comment": "default value used because none was supplied"
            }

        R = np.array(self.extrinsics['R'], dtype=np.float32)
        t = np.array(self.extrinsics['t'], dtype=np.float32)

        input_scale = self.base_height / frame.shape[0]

        scaled_img = cv2.resize(frame,
                                dsize=None,
                                fx=input_scale,
                                fy=input_scale)

        scaled_img = scaled_img[:, 0:scaled_img.shape[1] -
                                (scaled_img.shape[1] % stride)]

        # scaled_img = pad_resize_image(scaled_img, (scaled_img.shape[0], (scaled_img.shape[1] + stride)//stride, scaled_img.shape[2]))

        output["input_size"] = {
            "value": scaled_img.shape,
            "comment": "network inpute size"
        }

        if fx < 0:  # Focal length is unknown
            fx = np.float32(0.8 * frame.shape[1])

            output["focal_length"] = {
                "value": fx,
                "comment": "Focal length is unknown, 0.8 * frame width used"
            }

        # the inference
        inference_result = self.net.infer(scaled_img)
        poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride,
                                         fx, is_video)
        edges = []

        if len(poses_3d):
            poses_3d = rotate_poses(poses_3d, R, t)
            poses_3d_copy = poses_3d.copy()
            x = poses_3d_copy[:, 0::4]
            y = poses_3d_copy[:, 1::4]
            z = poses_3d_copy[:, 2::4]
            poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y

            poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
            edges = (Plotter3d.SKELETON_EDGES +
                     19 * np.arange(poses_3d.shape[0]).reshape(
                         (-1, 1, 1))).reshape((-1, 2))

        output["pose_3d"] = {
            "value": poses_3d,
            "comment": "re-oriented 3D poses"
        }

        output["pose_2d"] = {"value": poses_2d, "comment": "2D poses"}

        output["edges"] = {"value": edges, "comment": "2D poses"}

        return output