def visualize_pose_from_expert_data(data_file, camera_id):
    expert_traj, pos_data, env_name, dt = load_pose_data(data_file, camera_id)
    pos_connection = POS_CONNECTION[env_name]

    # import pdb; pdb.set_trace()
    image_size = expert_traj[camera_id]['camera_info'][camera_id]['image_size']
    image = np.zeros([image_size, image_size, 3], dtype=np.uint8)
    env, _ = make_env(env_name, 1234, {})

    fig = plt.figure()
    for i_pos_id in range(100):
        i_pos_data = pos_data[i_pos_id]

        # render the image
        image = env.render(camera_id=camera_id,
                           qpos=expert_traj[0]['qpos'][i_pos_id])

        fig = plt.figure()
        visualize_pose(image, i_pos_data, pos_connection, show=False)
        fig.canvas.draw()
        plt_results = np.array(fig.canvas.renderer._renderer)
        print('Processing %d out of %d' % (i_pos_id, 100))
        if i_pos_id == 0:
            width, height, _ = plt_results.shape
            output_dir = \
                data_file.replace('.npy', '_' + str(camera_id) + '.mp4')
            video = cv2.VideoWriter(
                os.path.join(init_path.get_abs_base_dir(), output_dir),
                cv2.VideoWriter_fourcc(*'mp4v'), 40, (height, width))
        plt.imshow(plt_results)
        video.write(plt_results[:, :, [2, 1, 0]])
        plt.close()

    video.release()
Beispiel #2
0
    def __init__(self, env_name):
        self._env_name = env_name

        self._env, self._env_info = env_register.make_env(self._env_name, 1234)
        self._joint_range = \
            np.array(self._env._controller_info['range']) / 180.0 * np.pi
        self._len_qpos = self._env.get_qpos_size()
        self._control_info = self._env.get_controller_info()
Beispiel #3
0
    def __init__(self, env_name, *args, **kwargs):
        remove_render = re.compile(r'__render$')

        self.env_name = remove_render.sub('', env_name)
        from mbbl.env import env_register
        self.env, _ = env_register.make_env(self.env_name, *args, **kwargs)
        self.episode_number = 0

        # Getting path from logger
        self.path = logger._get_path()
        self.obs_buffer = []
Beispiel #4
0
def main(args=None):
    """Run script.

    Args:
        args: A list of argument strings to use instead of sys.argv.
    """
    args = parse_args(args)
    try:
        env = gym.make(args.env)
    except gym.error.Error:
        env, _ = env_register.make_env(args.env,
                                       None,
                                       misc_info={"reset_type": "gym"})
    stats = utils.stats.OnlineMeanVariance()

    try:
        print("Observation Space Shape:", env.observation_space.shape)
    except AttributeError:
        pass
    try:
        print("Action Space Shape:", env.action_space.shape)
    except AttributeError:
        pass

    for _ in tqdm.trange(args.num_samples):
        state = env.reset()
        stats.add(np.asarray(state))

    print("State Num Dimensions", len(state))
    print("mean\n", stats.mean())
    print("var\n", stats.variance())
    print("stddev\n", np.sqrt(stats.variance()))

    try:
        print()
        print(env.metadata["initial_state.mean"])
        print(np.diag(env.metadata["initial_state.covariance"]))
        print(np.sqrt(np.diag(env.metadata["initial_state.covariance"])))
    except (AttributeError, KeyError):
        pass
Beispiel #5
0
 def _build_env(self):
     self._env, self._env_info = env_register.make_env(
         self.args.task, self._npr.randint(0, 9999),
         {'allow_monitor': self.args.monitor and self._worker_id == 0})
     self._env_solved_reward = self._env_info['SOLVED_REWARD'] \
         if 'SOLVED_REWARD' in self._env_info else np.inf
Beispiel #6
0
def make_norm_env(cfg):
    if 'gym' in cfg.env_name:
        from mbbl.env.env_register import make_env
        misc_info = {'reset_type': 'gym'}
        if 'gym_pets' in cfg.env_name:
            misc_info['pets'] = True
        env, meta = make_env(cfg.env_name, rand_seed=cfg.seed, misc_info=misc_info)

        env.metadata = env._env.metadata
        env.reward_range = env._env.reward_range
        env.spec = env._env.spec
        env.unwrapped = env._env.unwrapped
        # env._configured = env._env._configured
        env.close = env._env.close
        env = RescaleAction(env, -1., 1.)
        # assert np.all(env._env.action_space.high == env._env.action_space.high)
        assert not cfg.max_episode_steps

        # env.action_space = env._env.action_space
        if cfg.env_name == 'gym_fswimmer' or 'gym_pets' in cfg.env_name:
            env._max_episode_steps = env.env._env_info['max_length']
        else:
            env._max_episode_steps = env.env._env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env._env.render(mode='rgb_array')
            return frame

        env.render = render

        def set_seed(seed):
            if 'gym_pets' in cfg.env_name or cfg.env_name == 'gym_fswimmer':
                return env.env._env.seed(seed)
            else:
                return env.env._env.env.seed(seed)
    elif cfg.env_name == 'Humanoid-v2':
        env = gym.make('Humanoid-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'pets_cheetah':
        from svg.env import register_pets_environments
        register_pets_environments()
        env = gym.make('PetsCheetah-v0')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'pets_reacher':
        from svg.env import register_pets_environments
        register_pets_environments()
        env = gym.make('PetsReacher-v0')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'pets_pusher':
        from svg.env import register_pets_environments
        register_pets_environments()
        env = gym.make('PetsPusher-v0')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'mbpo_hopper':
        env = gym.make('Hopper-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'mbpo_walker2d':
        env = gym.make('Walker2d-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        # env.reset_old = env.reset
        # env.reset = lambda: env.reset_old()[0]
        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'mbpo_ant':
        from .env import register_mbpo_environments
        register_mbpo_environments()
        env = gym.make('AntTruncatedObs-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'mbpo_cheetah':
        from svg.env import register_mbpo_environments
        register_mbpo_environments()
        env = gym.make('HalfCheetah-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    elif cfg.env_name == 'mbpo_humanoid':
        from svg.env import register_mbpo_environments
        register_mbpo_environments()
        env = gym.make('HumanoidTruncatedObs-v2')
        env = RescaleAction(env, -1., 1.)
        assert not cfg.max_episode_steps

        env._max_episode_steps = env.env._max_episode_steps

        def render(mode, height, width, camera_id):
            frame = env.env.render(mode='rgb_array')
            return frame
        env.render = render

        def set_seed(seed):
            return env.env.seed(seed)
    else:
        assert cfg.env_name.startswith('dmc_')
        env = dmc.make(cfg)

        if cfg.pixels:
            env = FrameStack(env, k=cfg.frame_stack)
            def set_seed(seed):
                return env.env.env._env.task.random.seed(seed)
        else:
            def set_seed(seed):
                return env.env._env.task.random.seed(seed)

    env.set_seed = set_seed

    return env
TEST = "DYNAMICS_DERIVATIVE"  # 'REWARD_DERIVATIVE'
# TEST = 'REWARD_DERIVATIVE'

# candidate_names = invertedPendulum.env.PENDULUM
# candidate_names = pendulum.env.PENDULUM
# candidate_names = pets.env.ENV
candidate_names = humanoid.env.ENV
if __name__ == '__main__' and TEST == 'REWARD_DERIVATIVE':
    DERIVATIVE_EPS = 1e-6

    # test the walker
    # candidate_names = walker.env.WALKER
    # candidate_names = reacher.env.ARM_2D
    max_error = 0.0
    for env_name in candidate_names:
        env, _ = env_register.make_env(env_name, 123, {})
        env_info = env_register.get_env_info(env_name)
        derivative_env, _ = env_register.make_env(env_name, 234, {})

        data_dict = \
            {'action': np.random.uniform(-1, 1, [1, env_info['action_size']])}
        data_dict['start_state'], _, _, _ = env.reset()
        data_dict['start_state'] = data_dict['start_state'].reshape(1, -1)
        data_dict['start_state'] = data_dict['start_state'].reshape(1, -1)

        r_u = derivative_env.reward_derivative(data_dict, 'action')
        r_uu = derivative_env.reward_derivative(data_dict, 'action-action')
        r_x = derivative_env.reward_derivative(data_dict, 'state')
        r_xx = derivative_env.reward_derivative(data_dict, 'state-state')

        # test the derivative of the reward wrt action
Beispiel #8
0
 def build_network(self):
     self._env, self._env_info = env_register.make_env(
         self.args.task_name, self._npr.randint(0, 9999))
 def _build_env(self):
     self._env, self._env_info = env_register.make_env(
         self.args.task, self._npr.randint(0, 999999),
         {'allow_monitor': self.args.monitor})
# import matplotlib.pyplot as plt
# from mbbl.env.dm_env.pos_dm_env import POS_CONNECTION
from mbbl.env.env_register import make_env
# from mbbl.util.common import logger
# from mbbl.config import init_path
# import cv2
# import os
# from skimage import draw
import numpy as np
# from mbbl.util.il.expert_data_util import load_pose_data
# import argparse


if __name__ == '__main__':

    env, env_info = make_env("cheetah-run-pos", 1234)
    control_info = env.get_controller_info()
    dynamics_env, _ = make_env("cheetah-run-pos", 1234)

    # generate the data
    env.reset()
    for i in range(1000):
        action = np.random.randn(env_info['action_size'])
        qpos = np.array(env._env.physics.data.qpos, copy=True)
        old_qpos = np.array(env._env.physics.data.qpos, copy=True)
        old_qvel = np.array(env._env.physics.data.qvel, copy=True)
        old_qacc = np.array(env._env.physics.data.qacc, copy=True)
        old_qfrc_inverse = np.array(env._env.physics.data.qfrc_inverse, copy=True)
        _, _, _, _ = env.step(action)
        ctrl = np.array(env._env.physics.data.ctrl, copy=True)
        qvel = np.array(env._env.physics.data.qvel, copy=True)
Beispiel #11
0
 def _build_env(self):
     self._env, self._env_info = env_register.make_env(
         self.args.task, self._npr.randint(0, 9999),
         {'allow_monitor': self.args.monitor and self._worker_id == 0})
     self._fake_env = fake_env(self._env, self._step)
 def build_network(self):
     # the placeholders
     self._env, self._env_info = env_register.make_env(
         self.args.task_name, self._npr.randint(0, 9999))
     self._env.reset()
     assert hasattr(self._env, 'fdynamics')
Beispiel #13
0
 def __init__(self, env_name):
     self.env, _ = make_env(env_name, 1234)
     self.env.reset()
Beispiel #14
0
def get_env_info(env_name, env_seed, dtype):
    """ @brief: In this function return the reward moment function, initial
        state mean and initial state covariance.
    """

    env, env_info = env_register.make_env(
        env_name, env_seed, misc_info={"reset_type": "gym"}
    )

    # the reward moment function
    if env_name == "gym_walker2d":
        reward_moment_map = reward_functions.mbbl_walker_reward(
            8,  # velocity index
            0,  # height index
            1.3,  # target_height
            3.0,  # height_coefficient
            slice(17, 17 + 6),  # actions
            0.1,  # action coeff
        )

    elif env_name == "gym_hopper":
        reward_moment_map = reward_functions.mbbl_walker_reward(
            5,  # velocity index
            0,  # height index
            1.3,  # target_height
            3.0,  # height_coefficient
            slice(11, 11 + 3),  # actions
            0.1,  # action coeff
        )
    elif env_name == "gym_swimmer":
        reward_moment_map = reward_functions.mbbl_walker_reward(
            3,  # velocity index
            0,  # height index
            0.0,  # target_height
            0.0,  # height_coefficient
            slice(8, 10),  # actions
            0.0001,  # action coeff
        )
    elif env_name == "gym_fswimmer":
        reward_moment_map = reward_functions.fswimmer_walker_reward(
            8,  # velocity index
            0,  # height index
            0.0,  # target_height
            0.0,  # height_coefficient
            slice(8, 10),  # actions
            0.0001,  # action coeff
        )
    elif "gym_cheetah" in env_name:
        reward_moment_map = reward_functions.mbbl_walker_reward(
            8,  # velocity index
            0,  # height index
            0.0,  # target_height
            0.0,  # height_coefficient
            slice(17, 17 + 6),  # actions
            0.1,  # action coeff
        )
    elif env_name == "gym_ant":
        reward_moment_map = reward_functions.mbbl_walker_reward(
            13,  # velocity index
            0,  # height index
            0.57,  # target_height
            3.0,  # height_coefficient
            slice(27, 27 + 8),  # actions
            0.1,  # action coeff
        )
    elif env_name == "gym_reacher":
        # Action space has size 2. Distance index is [-3:len(state)]
        # So [-5:] gets both
        reward_moment_map = reward_functions.surrogate_reacher_reward(
            distance_index=slice(-5, None)
        )
    elif env_name == "gym_acrobot":
        reward_moment_map = reward_functions.acrobot_reward()

    elif "gym_cartpole" in env_name:
        reward_moment_map = reward_functions.cartpole_reward(
            position_index=0, position_coefficient=0.01, angle_index=2
        )

    elif env_name == "gym_mountain":
        reward_moment_map = reward_functions.mountain_car_reward()

    elif "gym_pendulum" in env_name:
        reward_moment_map = reward_functions.pendulum_reward(slice(3, 4))

    elif env_name == "gym_invertedPendulum":
        reward_moment_map = reward_functions.inverted_pendulum_reward()

    else:
        raise NotImplementedError

    # the mean of the initial state
    initial_state_mean = np.array(np.reshape(get_x0(env_name), [-1]), dtype=np.float64)

    # the cov of the initial state
    initial_state_covariance = get_covX0(env_name, env_info)

    return (
        env,
        env_info,
        np.array(initial_state_mean, dtype=dtype),
        np.array(initial_state_covariance, dtype=dtype),
        reward_moment_map,
    )
Beispiel #15
0
    parser.add_argument("--env_name",
                        type=str,
                        required=False,
                        default='cheetah-run-pos')
    parser.add_argument("-t",
                        "--visualize_type",
                        type=str,
                        required=False,
                        help="The directory of the expert data file",
                        default="inverse_action")
    parser.add_argument("--sol_qpos_freq", type=int, required=False, default=1)
    parser.add_argument("--traj_id", type=int, required=False, default=0)
    parser.add_argument("--traj_length",
                        type=int,
                        required=False,
                        default=1000)

    args = parser.parse_args()
    env, env_info = make_env(args.env_name, 1234)
    control_info = env.get_controller_info()
    expert_data = load_pose_data(args.expert_data_name, args.camera_id)

    # how good is the intepolation methods?
    plot_interpolation(expert_data[args.traj_id], args.sol_qpos_freq,
                       args.traj_length)

    # how accurate is inverse dynamics?
    plot_inverse_dynamics_stats(env, expert_data[args.traj_id],
                                args.sol_qpos_freq, control_info,
                                args.traj_length)