Exemple #1
0
    def __init__(self, env_name='walk'):
        self.world = pydart.World(1. / 1200.,
                                  "../data/woody_with_ground_v2.xml")
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.Kp, self.Kd = 400., 40.
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)

        self.env_name = env_name

        self.ref_motion = None  # type: ym.Motion

        if env_name == 'walk':
            self.ref_motion = yf.readBvhFile(
                "../data/woody_walk_normal.bvh")[40:]
        elif env_name == 'spiral_walk':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_spiral_walk_normal05.bvh")
        elif env_name == 'walk_spin':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_2foot_walk_turn2.bvh")
        elif env_name == 'jump':
            self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[164:280]
        elif env_name == 'walk_fast':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_WalkForwardVFast00.bvh")

        elif env_name == 'walk_left_90':
            self.ref_motion = yf.readBvhFile("../data/walk_left_90degree.bvh")
        elif env_name == 'walk_left_45':
            self.ref_motion = yf.readBvhFile("../data/walk_left_45degree.bvh")
        elif env_name == 'walk_pick':
            self.ref_motion = yf.readBvhFile("../data/wd2_pick_walk_1.bvh")
        elif env_name == 'walk_u_turn':
            self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214]
            self.ref_motion.translateByOffset([0., 0.03, 0.])

        elif env_name == 'jump_whole':
            self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[315:966]
        elif env_name == 'walk_u_turn_whole':
            self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")
            self.ref_motion.translateByOffset([0., 0.03, 0.])

        self.ref_world = pydart.World(1. / 1200.,
                                      "../data/woody_with_ground.xml")
        self.ref_skel = self.ref_world.skeletons[1]
        # self.step_per_frame = round((1./self.world.time_step()) / self.ref_motion.fps)
        self.step_per_frame = 40

        self.rsi = True

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftForeArm'),
            self.skel.bodynode_index('RightForeArm')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.motion_len = len(self.ref_motion)
        self.motion_time = len(self.ref_motion) / self.ref_motion.fps

        self.time_offset = 0.

        state_num = 1 + (3 * 3 + 4) * self.body_num
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None

        self.phase_frame = 0

        # max training time
        self.training_time = 3.

        self.evaluation = False

        self.visualize = False
        self.visualize_select_motion = 0
Exemple #2
0
class HpDartEnv(gym.Env):
    def __init__(self, env_name='walk'):
        self.skel_file_name = '../data/cmu_with_ground.xml'
        self.world = pydart.World(1. / 1200., self.skel_file_name)
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.Kp, self.Kd = 400., 40.
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)

        self.env_name = env_name

        self.ref_world = pydart.World(1. / 1200., self.skel_file_name)
        self.ref_skel = self.ref_world.skeletons[1]

        self.step_per_frame = 40

        self.rnn = RNNController(env_name)
        self.RNN_MOTION_SCALE = 0.01
        self.rnn_joint_list = [
            "Head", "Hips", "LHipJoint", "LeftArm", "LeftFoot", "LeftForeArm",
            "LeftHand", "LeftLeg", "LeftShoulder", "LeftToeBase", "LeftUpLeg",
            "LowerBack", "Neck", "Neck1", "RHipJoint", "RightArm", "RightFoot",
            "RightForeArm", "RightHand", "RightLeg", "RightShoulder",
            "RightToeBase", "RightUpLeg", "Spine", "Spine1"
        ]
        self.rnn_target_update_prob = 1. / 150.
        self.ik = DartIk(self.ref_skel)

        self.rsi = True

        self.w_g = 0.3
        self.w_p = 0.65 * .7
        self.w_v = 0.1 * .7
        self.w_e = 0.15 * .7
        self.w_c = 0.1 * .7

        self.exp_g = 2.5
        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftHand'),
            self.skel.bodynode_index('RightHand')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))

        self.time_offset = 0.
        self.step_num = 0

        # state_num = 2 + (3*3 + 4) * self.body_num
        state_num = 2 + 2 * 3 * self.body_num + 3 * (self.skel.num_dofs() - 3)
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None

        self.phase_frame = 0
        self.goal = np.zeros(2)
        self.goal_prev = self.goal.copy()
        self.goal_in_world_frame = np.zeros(3)

        self.first = True

        self.prev_ref_q = self.ref_skel.positions()
        self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_p_e = np.asarray([
            body.world_transform()[:3, 3] for body in self.ref_body_e
        ]).flatten()
        self.prev_ref_com = self.ref_skel.com()

    def state(self):
        pelvis = self.skel.body(0)
        p_pelvis = pelvis.world_transform()[:3, 3]
        R_pelvis = pelvis.world_transform()[:3, :3]

        state = [self.goal[0] / 5., self.goal[1] / 5.]

        # p = np.array([np.dot(R_pelvis.T, body.to_world() - p_pelvis) for body in self.skel.bodynodes]).flatten()
        # R = np.array([mm.rot2quat(np.dot(R_pelvis.T, body.world_transform()[:3, :3])) for body in self.skel.bodynodes]).flatten()
        # v = np.array([np.dot(R_pelvis.T, body.world_linear_velocity()) for body in self.skel.bodynodes]).flatten()
        # w = np.array([np.dot(R_pelvis.T, body.world_angular_velocity())/20. for body in self.skel.bodynodes]).flatten()
        #
        # state.extend(p)
        # state.extend(R)
        # state.extend(v)
        # state.extend(w)

        p = 0.8 * np.array([
            np.dot(R_pelvis.T,
                   body.to_world() - p_pelvis) for body in self.skel.bodynodes
        ]).flatten()
        v = 0.2 * np.array([
            np.dot(R_pelvis.T, body.world_linear_velocity())
            for body in self.skel.bodynodes
        ]).flatten()
        _R = 2. * self.skel.positions()
        _w = .2 * self.skel.velocities()
        R = np.append(_R[:3], _R[6:]).flatten()
        w = np.append(_w[:3], _w[6:]).flatten()

        _ref_R = 2. * self.ref_skel.positions()
        ref_R = np.append(_ref_R[:3], _ref_R[6:]).flatten()

        state.extend(p)
        state.extend(R)
        state.extend(v)
        state.extend(w)
        state.extend(ref_R)

        return np.asarray(state).flatten()

    def reward(self):
        p_e = np.asarray(
            [body.world_transform()[:3, 3] for body in self.body_e]).flatten()

        return exp_reward_term(self.w_p, self.exp_p, self.skel.position_differences(self.prev_ref_q, self.skel.q)) \
              + exp_reward_term(self.w_v, self.exp_v, self.skel.velocity_differences(self.prev_ref_dq, self.skel.dq)) \
              + exp_reward_term(self.w_e, self.exp_e, p_e - self.prev_ref_p_e) \
              + exp_reward_term(self.w_c, self.exp_c, self.skel.com() - self.prev_ref_com) \
              + exp_reward_term(self.w_g, self.exp_g, self.prev_goal)

    def is_done(self):
        if self.skel.com()[1] < 0.4 or self.skel.com()[1] > 3.0:
            return True
        elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan(
                np.asarray(self.skel.dq)):
            return True
        elif self.world.time() > 10.:
            return True
        return False

    def step(self, _action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).

        # Arguments
            action (object): An action provided by the environment.

        # Returns
            observation (object): Agent's observation of the current environment.
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        action = np.hstack((np.zeros(6), _action / 10.))
        for i in range(self.step_per_frame):
            # self.skel.set_forces(self.skel.get_spd(self.ref_skel.q + action, self.world.time_step(), self.Kp, self.Kd))
            self.skel.set_forces(
                self.pdc.compute_flat(self.ref_skel.q + action))
            self.world.step()

        if random() < self.rnn_target_update_prob:
            self.sample_target()

        self.update_goal_in_local_frame(False)
        self.get_rnn_ref_pose_step(False)

        return tuple([self.state(), self.reward(), self.is_done(), dict()])

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.

        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        self.world.reset()

        if self.first:
            self.sample_target()
        self.get_rnn_ref_pose_step(True)

        self.skel.set_positions(self.ref_skel.positions())
        self.skel.set_velocities(self.ref_skel.velocities())

        self.first = False

        return self.state()

    def sample_target(self):
        angle = 2. * pi * random()
        radius = 5. * random()

        self.goal_in_world_frame = self.skel.body(0).to_world() + radius * (
            cos(angle) * mm.unitX() - sin(angle) * mm.unitZ())
        self.goal_in_world_frame[1] = 0.

    def update_goal_in_local_frame(self, reset=False):
        if not reset:
            self.prev_goal = self.goal.copy()
        body_transform = self.skel.body(0).world_transform()
        goal_vector_in_world_frame = self.goal_in_world_frame - body_transform[:
                                                                               3,
                                                                               3]
        goal_vector_in_world_frame[1] = 0.
        radius = mm.length(goal_vector_in_world_frame)
        unit_goal_vector_in_world_frame = mm.normalize(
            goal_vector_in_world_frame)
        root_x_in_world_plane = body_transform[:3, 0]
        root_x_in_world_plane[1] = 0.
        unit_root_x_in_world_plane = mm.seq2Vec3(
            mm.normalize(root_x_in_world_plane))
        unit_root_z_in_world_plane = mm.cross(unit_root_x_in_world_plane,
                                              mm.unitY())
        # angle = atan2(np.dot(unit_root_x_in_world_plane, unit_goal_vector_in_world_frame), np.dot(unit_root_z_in_world_plane, unit_goal_vector_in_world_frame))

        self.goal = radius * np.array([
            np.dot(unit_root_x_in_world_plane,
                   unit_goal_vector_in_world_frame),
            np.dot(unit_root_z_in_world_plane, unit_goal_vector_in_world_frame)
        ])
        if reset:
            self.prev_goal = self.goal.copy()

    def get_rnn_ref_pose_step(self, reset=False):
        if not reset:
            self.prev_ref_q = self.ref_skel.positions()
            self.prev_ref_dq = self.ref_skel.velocities()
            self.prev_ref_p_e_hat = np.asarray([
                body.world_transform()[:3, 3] for body in self.ref_body_e
            ]).flatten()
            self.prev_ref_com = self.ref_skel.com()

        p = self.goal_in_world_frame

        target = Pose2d(
            [p[0] / self.RNN_MOTION_SCALE, -p[2] / self.RNN_MOTION_SCALE])
        target = self.rnn.pose.relativePose(target)
        target = target.p
        t_len = v_len(target)
        if t_len > 80:
            ratio = 80 / t_len
            target[0] *= ratio
            target[1] *= ratio

        contacts, points, angles, orientations, root_orientation = self.rnn.step(
            target)

        for j in range(len(self.ref_skel.joints)):
            if j == 0:
                joint = self.ref_skel.joints[j]  # type: pydart.FreeJoint
                joint_idx = self.rnn_joint_list.index(joint.name)
                hip_angles = mm.logSO3(
                    np.dot(root_orientation, orientations[joint_idx]))
                # hip_angles = mm.logSO3(root_orientation)
                joint.set_position(
                    np.array([
                        hip_angles[0], hip_angles[1], hip_angles[2],
                        points[0][0], points[0][1], points[0][2]
                    ]))
                continue
            joint = self.ref_skel.joints[j]  # type: pydart.BallJoint
            joint_idx = self.rnn_joint_list.index(joint.name)
            joint.set_position(angles[joint_idx * 3:joint_idx * 3 + 3])

        self.ik.clean_constraints()
        self.ik.add_joint_pos_const('LeftForeArm', np.asarray(points[10]))
        self.ik.add_joint_pos_const('LeftHand', np.asarray(points[2]))
        self.ik.add_joint_pos_const('LeftLeg', np.asarray(points[11]))
        self.ik.add_joint_pos_const('LeftFoot', np.asarray(points[3]))
        if contacts[0] > 0.8 and False:
            body_transform = self.ref_skel.body('LeftFoot').transform()[:3, :3]
            angle = acos(body_transform[1, 1])
            body_ori = np.dot(body_transform, mm.rotX(-angle))
            self.ik.add_orientation_const('LeftFoot', body_ori)

        self.ik.add_joint_pos_const('RightForeArm', np.asarray(points[12]))
        self.ik.add_joint_pos_const('RightHand', np.asarray(points[5]))
        self.ik.add_joint_pos_const('RightLeg', np.asarray(points[13]))
        self.ik.add_joint_pos_const('RightFoot', np.asarray(points[6]))
        self.ik.solve()

        foot_joint_ori = mm.exp(self.ref_skel.joint('LeftFoot').position())
        self.ref_skel.joint('LeftFoot').set_position(
            mm.logSO3(np.dot(foot_joint_ori, np.dot(mm.rotX(-.6),
                                                    mm.rotZ(.4)))))
        foot_joint_ori = mm.exp(self.ref_skel.joint('RightFoot').position())
        self.ref_skel.joint('RightFoot').set_position(
            mm.logSO3(
                np.dot(foot_joint_ori, np.dot(mm.rotX(-.6), mm.rotZ(-.4)))))

        if not self.first:
            dq = 30. * self.ref_skel.position_differences(
                self.ref_skel.positions(), self.prev_ref_q)
            self.ref_skel.set_velocities(dq)

        if reset:
            self.prev_ref_q = self.ref_skel.positions()
            self.prev_ref_dq = self.ref_skel.velocities()
            self.prev_ref_p_e_hat = np.asarray([
                body.world_transform()[:3, 3] for body in self.ref_body_e
            ]).flatten()
            self.prev_ref_com = self.ref_skel.com()

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.)

        # Arguments
            mode (str): The mode to render with.
            close (bool): Close all open renderings.
        """
        return

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        pass

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def GetState(self, param):
        return self.state()

    def GetStates(self):
        return [self.state()]

    def Resets(self, rsi):
        self.rsi = rsi
        self.reset()

    def Reset(self, rsi, idx):
        self.rsi = rsi
        self.reset()

    def Steps(self, actions):
        return self.step(actions[0])

    def IsTerminalState(self, j):
        return self.is_done()

    def GetReward(self, j):
        return self.reward()

    def IsTerminalStates(self):
        return [self.is_done()]
Exemple #3
0
    def __init__(self, env_name='walk'):
        self.skel_file_name = '../data/cmu_with_ground.xml'
        self.world = pydart.World(1. / 1200., self.skel_file_name)
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.Kp, self.Kd = 400., 40.
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)

        self.env_name = env_name

        self.ref_world = pydart.World(1. / 1200., self.skel_file_name)
        self.ref_skel = self.ref_world.skeletons[1]

        self.step_per_frame = 40

        self.rnn = RNNController(env_name)
        self.RNN_MOTION_SCALE = 0.01
        self.rnn_joint_list = [
            "Head", "Hips", "LHipJoint", "LeftArm", "LeftFoot", "LeftForeArm",
            "LeftHand", "LeftLeg", "LeftShoulder", "LeftToeBase", "LeftUpLeg",
            "LowerBack", "Neck", "Neck1", "RHipJoint", "RightArm", "RightFoot",
            "RightForeArm", "RightHand", "RightLeg", "RightShoulder",
            "RightToeBase", "RightUpLeg", "Spine", "Spine1"
        ]
        self.rnn_target_update_prob = 1. / 150.
        self.ik = DartIk(self.ref_skel)

        self.rsi = True

        self.w_g = 0.3
        self.w_p = 0.65 * .7
        self.w_v = 0.1 * .7
        self.w_e = 0.15 * .7
        self.w_c = 0.1 * .7

        self.exp_g = 2.5
        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftHand'),
            self.skel.bodynode_index('RightHand')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))

        self.time_offset = 0.
        self.step_num = 0

        # state_num = 2 + (3*3 + 4) * self.body_num
        state_num = 2 + 2 * 3 * self.body_num + 3 * (self.skel.num_dofs() - 3)
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None

        self.phase_frame = 0
        self.goal = np.zeros(2)
        self.goal_prev = self.goal.copy()
        self.goal_in_world_frame = np.zeros(3)

        self.first = True

        self.prev_ref_q = self.ref_skel.positions()
        self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_p_e = np.asarray([
            body.world_transform()[:3, 3] for body in self.ref_body_e
        ]).flatten()
        self.prev_ref_com = self.ref_skel.com()
Exemple #4
0
    def __init__(self, rnn_len):
        skel_file = '../data/cmu_with_ground.xml'
        self.world = pydart.World(1./1200., skel_file)
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.Kp, self.Kd = 800., 40.
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)

        self.ref_motion = yf.readBvhFile("../data/cmu_tpose.bvh")[:rnn_len]
        self.ref_motion_len = rnn_len

        self.ref_world = pydart.World(1./1200., skel_file)
        self.ref_skel = self.ref_world.skeletons[1]
        self.step_per_frame = 40

        self.rsi = True

        # self.w_g = 0.3
        # self.w_p = 0.65 * .7
        # self.w_v = 0.1 * .7
        # self.w_e = 0.15 * .7
        # self.w_c = 0.1 * .7

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_g = 2.5
        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'),
                      self.skel.bodynode_index('LeftHand'), self.skel.bodynode_index('RightHand')]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.motion_len = len(self.ref_motion)
        self.motion_time = len(self.ref_motion) / self.ref_motion.fps

        state_num = 2 + (3*3 + 4) * self.body_num + (3 + 4) * self.body_num
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi*10./2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32)

        self.viewer = None

        self.phase_frame = 0

        self.goals_in_world_frame = list()
        self.goal = np.zeros(2)

        self.prev_ref_q = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_p_e_hat = np.asarray([body.world_transform()[:3, 3] for body in self.ref_body_e]).flatten()
        self.prev_ref_com = self.ref_skel.com()
        self.prev_goal = np.zeros(2)
Exemple #5
0
    def __init__(self):
        self.world = pydart.World(1./1200., "../data/woody_with_ground.xml")
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)
        self.Kp, self.Kd = 400., 40.

        self.ik = DartIk(self.skel)

        self.ref_motions = list()  # type: list[ym.Motion]
        self.ref_motions.append(yf.readBvhFile("../data/woody_walk_normal.bvh")[40:])
        self.ref_motions.append(yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh"))
        self.ref_motions.append(yf.readBvhFile("../data/wd2_WalkForwardSlow01.bvh"))
        self.ref_motions.append(yf.readBvhFile("../data/walk_right_45degree.bvh")[21:134])
        self.ref_motions.append(yf.readBvhFile("../data/walk_left_45degree.bvh")[26:141])
        # self.ref_motions.append(yf.readBvhFile("../data/walk_left_90degree.bvh")[21:153])
        # self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh'))
        # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280])
        # self.ref_motions.append(yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214])
        # self.ref_motions[-1].translateByOffset([0., 0.03, 0.])
        self.motion_num = len(self.ref_motions)
        self.reward_weights_by_fps = [self.ref_motions[0].fps / self.ref_motions[i].fps for i in range(self.motion_num)]

        self.ref_motion = self.ref_motions[0]

        self.ref_world = pydart.World(1./1200., "../data/woody_with_ground.xml")
        self.ref_skel = self.ref_world.skeletons[1]
        self.step_per_frame = None

        self.rsi = True

        self.specified_motion_num = 0
        self.is_motion_specified = False

        # self.w_g = 0.3
        # self.w_p = 0.65 * .7
        # self.w_v = 0.1 * .7
        # self.w_e = 0.15 * .7
        # self.w_c = 0.1 * .7

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_g = 2.5
        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [self.skel.bodynode_index('LeftFoot'), self.skel.bodynode_index('RightFoot'),
                      self.skel.bodynode_index('LeftForeArm'), self.skel.bodynode_index('RightForeArm')]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.motion_len = len(self.ref_motion)

        self.goals_in_world_frame = list()
        self.goal = np.zeros(2)

        self.goals = list()
        self.goal_window = 45

        state_num = 3*self.goal_window + (3*3 + 4) * self.body_num
        # action_num = self.skel.num_dofs() - 6
        action_num = self.skel.num_dofs()

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi*2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high, action_high, dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high, state_high, dtype=np.float32)

        self.viewer = None

        self.phase_frame = 0

        self.prev_ref_q = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_dq = np.zeros(self.ref_skel.num_dofs())
        self.prev_ref_p_e_hat = np.asarray([body.world_transform()[:3, 3] for body in self.ref_body_e]).flatten()
        self.prev_ref_com = self.ref_skel.com()

        # setting for reward
        self.reward_joint = list()
        self.reward_joint.append('j_RightUpLeg')
        self.reward_joint.append('j_RightLeg')
        self.reward_joint.append('j_LeftUpLeg')
        self.reward_joint.append('j_LeftLeg')
        self.reward_joint.append('j_Spine')
        self.reward_joint.append('j_Spine1')
        self.reward_joint.append('j_RightArm')
        self.reward_joint.append('j_RightForeArm')
        self.reward_joint.append('j_LeftArm')
        self.reward_joint.append('j_LeftForeArm')
Exemple #6
0
class HpDartEnv(gym.Env):
    def __init__(self, env_name='walk', env_slaves=1):
        self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml")
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)

        self.env_name = env_name

        self.ref_motion = None  # type: ym.Motion

        if env_name == 'walk':
            self.ref_motion = yf.readBvhFile(
                "../data/woody_walk_normal.bvh")[40:]
        elif env_name == 'spiral_walk':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_spiral_walk_normal05.bvh")
        elif env_name == 'walk_spin':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_2foot_walk_turn2.bvh")
        elif env_name == 'jump':
            self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")[164:280]
        elif env_name == 'walk_fast':
            self.ref_motion = yf.readBvhFile(
                "../data/wd2_WalkForwardVFast00.bvh")

        elif env_name == 'walk_left_90':
            self.ref_motion = yf.readBvhFile("../data/walk_left_90degree.bvh")
        elif env_name == 'walk_left_45':
            self.ref_motion = yf.readBvhFile("../data/walk_left_45degree.bvh")
        elif env_name == 'walk_pick':
            self.ref_motion = yf.readBvhFile("../data/wd2_pick_walk_1.bvh")
        elif env_name == 'walk_u_turn':
            self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214]
            self.ref_motion.translateByOffset([0., 0.03, 0.])

        elif env_name == 'jump_whole':
            self.ref_motion = yf.readBvhFile("../data/wd2_jump0.bvh")
        elif env_name == 'walk_u_turn_whole':
            self.ref_motion = yf.readBvhFile("../data/wd2_u-turn.bvh")
            self.ref_motion.translateByOffset([0., 0.03, 0.])

        self.ref_world = pydart.World(1. / 1200.,
                                      "../data/woody_with_ground.xml")
        self.ref_skel = self.ref_world.skeletons[1]
        self.step_per_frame = round(
            (1. / self.world.time_step()) / self.ref_motion.fps)

        self.rsi = True

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftForeArm'),
            self.skel.bodynode_index('RightForeArm')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.total_time = len(self.ref_motion) / self.ref_motion.fps

        self.time_offset = 0.

        state_num = 1 + (3 * 3 + 4) * self.body_num
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None

    def state(self):
        p_pelvis = self.skel.body(0).world_transform()[:3, 3]
        R_pelvis = self.skel.body(0).world_transform()[:3, :3]

        phase = (self.world.time() + self.time_offset) / self.total_time
        state = [phase]

        p = np.array([
            self.skel.body(i).to_world() - p_pelvis
            for i in range(self.body_num)
        ]).flatten()
        # R = [mm.logSO3(np.dot(R_pelvis.T, self.skel.body(i).world_transform()[:3, :3]))/pi for i in range(self.body_num)]
        R = np.array([
            mm.rot2quat(
                np.dot(R_pelvis.T,
                       self.skel.body(i).world_transform()[:3, :3]))
            for i in range(self.body_num)
        ]).flatten()
        v = np.array([
            self.skel.body(i).world_linear_velocity()
            for i in range(self.body_num)
        ]).flatten()
        w = np.array([
            self.skel.body(i).world_angular_velocity() / 20.
            for i in range(self.body_num)
        ]).flatten()

        state.extend(p)
        state.extend(R)
        state.extend(v)
        state.extend(w)
        # print('p', np.linalg.norm(p))
        # print('R', np.linalg.norm(R))
        # print('v', np.linalg.norm(v))
        # print('w', np.linalg.norm(w))

        return np.asarray(state).flatten()

    def reward(self):
        current_frame = min(
            len(self.ref_motion) - 1,
            int((self.world.time() + self.time_offset) * self.ref_motion.fps))
        self.ref_skel.set_positions(self.ref_motion[current_frame].get_q())
        self.ref_skel.set_velocities(self.ref_motion.get_dq(current_frame))

        p_e_hat = np.asarray([
            body.world_transform()[:3, 3] for body in self.ref_body_e
        ]).flatten()
        p_e = np.asarray(
            [body.world_transform()[:3, 3] for body in self.body_e]).flatten()

        return exp_reward_term(self.w_p, self.exp_p, self.skel.q, self.ref_skel.q) \
              + exp_reward_term(self.w_v, self.exp_v, self.skel.dq, self.ref_skel.dq) \
              + exp_reward_term(self.w_e, self.exp_e, p_e, p_e_hat) \
              + exp_reward_term(self.w_c, self.exp_c, self.skel.com(), self.ref_skel.com())

    def is_done(self):
        if self.skel.com()[1] < 0.4:
            return True
        elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan(
                np.asarray(self.skel.dq)):
            return True
        elif self.world.time() + self.time_offset > self.total_time:
            return True
        return False

    def step(self, _action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).

        # Arguments
            action (object): An action provided by the environment.

        # Returns
            observation (object): Agent's observation of the current environment.
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        action = np.hstack((np.zeros(6), _action / 10.))

        current_frame = min(
            len(self.ref_motion) - 1,
            int((self.world.time() + self.time_offset) * self.ref_motion.fps))
        self.ref_skel.set_positions(self.ref_motion[current_frame].get_q())
        self.ref_skel.set_velocities(self.ref_motion.get_dq(current_frame))
        for i in range(self.step_per_frame):
            self.skel.set_forces(
                self.pdc.compute_flat(self.ref_skel.q + action))
            self.world.step()
        return tuple([self.state(), self.reward(), self.is_done(), dict()])

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.

        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        self.world.reset()
        # rand_frame = randrange(0, len(self.ref_motion)//2)
        rand_frame = randrange(0, len(self.ref_motion))
        if not self.rsi:
            rand_frame = 0
        self.time_offset = rand_frame / self.ref_motion.fps
        self.skel.set_positions(self.ref_motion[rand_frame].get_q())
        dq = self.ref_motion.get_dq(rand_frame)
        dq[3:6] = np.asarray(
            self.ref_motion.getDOFVelocitiesLocal(rand_frame)[0][:3])
        self.skel.set_velocities(dq)
        # self.skel.set_velocities(self.ref_motion.get_dq(rand_frame))

        return self.state()

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.)

        # Arguments
            mode (str): The mode to render with.
            close (bool): Close all open renderings.
        """
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
            # rod = rendering.make_capsule(1, .2)
            # rod.set_color(.8, .3, .3)
            # rod.add_attr(self.pole_transform)
            # self.viewer.add_geom(rod)
            self.body_transform = list()
            self.ref_body_transform = list()
            for i in range(self.body_num):
                axle = rendering.make_circle(.05)
                axle.set_color(0, 0, 0)
                self.body_transform.append(rendering.Transform())
                axle.add_attr(self.body_transform[i])
                self.viewer.add_geom(axle)

            for i in range(self.body_num):
                axle = rendering.make_circle(.05)
                axle.set_color(1, 0, 0)
                self.ref_body_transform.append(rendering.Transform())
                axle.add_attr(self.ref_body_transform[i])
                self.viewer.add_geom(axle)

        for i in range(self.body_num):
            self.body_transform[i].set_translation(
                self.skel.body(i).world_transform()[:3, 3][0] - 1.,
                self.skel.body(i).world_transform()[:3, 3][1])
            self.ref_body_transform[i].set_translation(
                self.ref_skel.body(i).world_transform()[:3, 3][0] - 1.,
                self.ref_skel.body(i).world_transform()[:3, 3][1])

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        pass

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def GetState(self, param):
        return self.state()

    def GetStates(self):
        return [self.state()]

    def Resets(self, rsi):
        self.rsi = rsi
        self.reset()

    def Reset(self, rsi, param1):
        self.rsi = rsi
        self.reset()

    def Steps(self, actions):
        return self.step(actions[0])

    def IsTerminalState(self, j):
        return self.is_done()

    def GetReward(self, j):
        return self.reward()

    def IsTerminalStates(self):
        return [self.is_done()]
Exemple #7
0
    def __init__(self, env_slaves=1):
        self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml")
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)
        self.Kp, self.Kd = 400., 40.

        self.ref_motions = list()  # type: list[ym.Motion]
        self.ref_motions.append(
            yf.readBvhFile("../data/woody_walk_normal.bvh")[40:])
        # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280])
        self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh'))
        self.ref_motions.append(
            yf.readBvhFile("../data/walk_left_90degree.bvh"))
        self.ref_motions.append(
            yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214])
        self.ref_motions[-1].translateByOffset([0., 0.03, 0.])
        self.ref_motions.append(
            yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh"))
        self.motion_num = len(self.ref_motions)
        self.reward_weights_by_fps = [
            self.ref_motions[0].fps / self.ref_motions[i].fps
            for i in range(self.motion_num)
        ]

        self.ref_motion = self.ref_motions[0]

        self.ref_world = pydart.World(1. / 1200.,
                                      "../data/woody_with_ground.xml")
        self.ref_skel = self.ref_world.skeletons[1]
        self.step_per_frame = None

        self.rsi = True

        self.specified_motion_num = 0
        self.is_motion_specified = False

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftForeArm'),
            self.skel.bodynode_index('RightForeArm')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.motion_time = len(self.ref_motion) / self.ref_motion.fps

        self.time_offset = 0.

        state_num = 2 + (3 * 3 + 4) * self.body_num
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None
Exemple #8
0
class HpDartMultiEnv(gym.Env):
    def __init__(self, env_slaves=1):
        self.world = pydart.World(1. / 1200., "../data/woody_with_ground.xml")
        self.world.control_skel = self.world.skeletons[1]
        self.skel = self.world.skeletons[1]
        self.pdc = PDController(self.skel, self.world.time_step(), 400., 40.)
        self.Kp, self.Kd = 400., 40.

        self.ref_motions = list()  # type: list[ym.Motion]
        self.ref_motions.append(
            yf.readBvhFile("../data/woody_walk_normal.bvh")[40:])
        # self.ref_motions.append(yf.readBvhFile("../data/wd2_jump0.bvh")[164:280])
        self.ref_motions.append(yf.readBvhFile('../data/wd2_WalkSukiko00.bvh'))
        self.ref_motions.append(
            yf.readBvhFile("../data/walk_left_90degree.bvh"))
        self.ref_motions.append(
            yf.readBvhFile("../data/wd2_u-turn.bvh")[25:214])
        self.ref_motions[-1].translateByOffset([0., 0.03, 0.])
        self.ref_motions.append(
            yf.readBvhFile("../data/wd2_WalkForwardVFast00.bvh"))
        self.motion_num = len(self.ref_motions)
        self.reward_weights_by_fps = [
            self.ref_motions[0].fps / self.ref_motions[i].fps
            for i in range(self.motion_num)
        ]

        self.ref_motion = self.ref_motions[0]

        self.ref_world = pydart.World(1. / 1200.,
                                      "../data/woody_with_ground.xml")
        self.ref_skel = self.ref_world.skeletons[1]
        self.step_per_frame = None

        self.rsi = True

        self.specified_motion_num = 0
        self.is_motion_specified = False

        self.w_p = 0.65
        self.w_v = 0.1
        self.w_e = 0.15
        self.w_c = 0.1

        self.exp_p = 2.
        self.exp_v = 0.1
        self.exp_e = 40.
        self.exp_c = 10.

        self.body_num = self.skel.num_bodynodes()
        self.idx_e = [
            self.skel.bodynode_index('LeftFoot'),
            self.skel.bodynode_index('RightFoot'),
            self.skel.bodynode_index('LeftForeArm'),
            self.skel.bodynode_index('RightForeArm')
        ]
        self.body_e = list(map(self.skel.body, self.idx_e))
        self.ref_body_e = list(map(self.ref_skel.body, self.idx_e))
        self.motion_time = len(self.ref_motion) / self.ref_motion.fps

        self.time_offset = 0.

        state_num = 2 + (3 * 3 + 4) * self.body_num
        action_num = self.skel.num_dofs() - 6

        state_high = np.array([np.finfo(np.float32).max] * state_num)
        action_high = np.array([pi * 10. / 2.] * action_num)

        self.action_space = gym.spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)
        self.observation_space = gym.spaces.Box(-state_high,
                                                state_high,
                                                dtype=np.float32)

        self.viewer = None

    def state(self):
        pelvis = self.skel.body(0)
        p_pelvis = pelvis.world_transform()[:3, 3]
        R_pelvis = pelvis.world_transform()[:3, :3]

        phase = min(1.,
                    (self.world.time() + self.time_offset) / self.motion_time)
        state = [
            0. if len(self.ref_motions) == 1 else self.specified_motion_num /
            (len(self.ref_motions) - 1), phase
        ]

        p = np.array([
            np.dot(R_pelvis.T,
                   body.to_world() - p_pelvis) for body in self.skel.bodynodes
        ]).flatten()
        R = np.array([
            mm.rot2quat(np.dot(R_pelvis.T,
                               body.world_transform()[:3, :3]))
            for body in self.skel.bodynodes
        ]).flatten()
        v = np.array([
            np.dot(R_pelvis.T, body.world_linear_velocity())
            for body in self.skel.bodynodes
        ]).flatten()
        w = np.array([
            np.dot(R_pelvis.T, body.world_angular_velocity()) / 20.
            for body in self.skel.bodynodes
        ]).flatten()

        state.extend(p)
        state.extend(R)
        state.extend(v)
        state.extend(w)
        # print('p', np.linalg.norm(p))
        # print('R', np.linalg.norm(R))
        # print('v', np.linalg.norm(v))
        # print('w', np.linalg.norm(w))

        return np.asarray(state).flatten()

    def reward(self):
        # current_frame = min(len(self.ref_motion)-1, int((self.world.time() + self.time_offset) * self.ref_motion.fps))
        current_time = self.world.time() + self.time_offset
        self.ref_skel.set_positions(
            self.ref_motion.get_q_by_time(current_time))
        self.ref_skel.set_velocities(
            self.ref_motion.get_dq_dart_by_time(current_time))

        p_e_hat = np.asarray([
            body.world_transform()[:3, 3] for body in self.ref_body_e
        ]).flatten()
        p_e = np.asarray(
            [body.world_transform()[:3, 3] for body in self.body_e]).flatten()

        reward = exp_reward_term(self.w_p, self.exp_p, self.skel.q, self.ref_skel.q) \
              + exp_reward_term(self.w_v, self.exp_v, self.skel.dq, self.ref_skel.dq) \
              + exp_reward_term(self.w_e, self.exp_e, p_e, p_e_hat) \
              + exp_reward_term(self.w_c, self.exp_c, self.skel.com(), self.ref_skel.com())

        return reward * self.reward_weights_by_fps[self.specified_motion_num]

    def is_done(self):
        if self.skel.com()[1] < 0.45:
            return True
        elif True in np.isnan(np.asarray(self.skel.q)) or True in np.isnan(
                np.asarray(self.skel.dq)):
            return True
        elif self.world.time() + self.time_offset > self.motion_time:
            return True
        return False

    def step(self, _action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).

        # Arguments
            action (object): An action provided by the environment.

        # Returns
            observation (object): Agent's observation of the current environment.
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        action = np.hstack((np.zeros(6), _action / 10.))

        next_frame_time = self.world.time(
        ) + self.time_offset + self.world.time_step() * self.step_per_frame
        self.ref_skel.set_positions(
            self.ref_motion.get_q_by_time(next_frame_time))
        self.ref_skel.set_velocities(
            self.ref_motion.get_dq_dart_by_time(next_frame_time))
        for i in range(self.step_per_frame):
            # self.skel.set_forces(self.skel.get_spd(self.ref_skel.q + action, self.world.time_step(), self.Kp, self.Kd))
            self.skel.set_forces(
                self.pdc.compute_flat(self.ref_skel.q + action))
            self.world.step()
        return tuple([self.state(), self.reward(), self.is_done(), dict()])

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.

        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        self.world.reset()

        if not self.is_motion_specified:
            self.specified_motion_num = randrange(0, self.motion_num)

        self.ref_motion = self.ref_motions[self.specified_motion_num]
        self.step_per_frame = round(
            (1. / self.world.time_step()) / self.ref_motion.fps)
        self.motion_time = len(self.ref_motion) / self.ref_motion.fps

        rand_frame = randrange(0, len(self.ref_motion)) if self.rsi else 0
        self.time_offset = rand_frame / self.ref_motion.fps

        self.skel.set_positions(self.ref_motion.get_q_by_time(
            self.time_offset))
        dq = self.ref_motion.get_dq_dart_by_time(self.time_offset)
        self.skel.set_velocities(dq)

        self.ref_skel.set_positions(
            self.ref_motion.get_q_by_time(self.time_offset))

        return self.state()

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.)

        # Arguments
            mode (str): The mode to render with.
            close (bool): Close all open renderings.
        """
        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        pass

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def GetState(self, param):
        return self.state()

    def GetStates(self):
        return [self.state()]

    def Resets(self, rsi):
        self.rsi = rsi
        self.reset()

    def Reset(self, rsi, param1):
        self.rsi = rsi
        self.reset()

    def Steps(self, actions):
        return self.step(actions[0])

    def IsTerminalState(self, j):
        return self.is_done()

    def GetReward(self, j):
        return self.reward()

    def IsTerminalStates(self):
        return [self.is_done()]

    def specify_motion_num(self, num=-1):
        if 0 <= num < self.motion_num:
            self.is_motion_specified = True
            self.specified_motion_num = num
        else:
            self.is_motion_specified = False