def visualize_pose_from_expert_data(data_file, camera_id): expert_traj, pos_data, env_name, dt = load_pose_data(data_file, camera_id) pos_connection = POS_CONNECTION[env_name] # import pdb; pdb.set_trace() image_size = expert_traj[camera_id]['camera_info'][camera_id]['image_size'] image = np.zeros([image_size, image_size, 3], dtype=np.uint8) env, _ = make_env(env_name, 1234, {}) fig = plt.figure() for i_pos_id in range(100): i_pos_data = pos_data[i_pos_id] # render the image image = env.render(camera_id=camera_id, qpos=expert_traj[0]['qpos'][i_pos_id]) fig = plt.figure() visualize_pose(image, i_pos_data, pos_connection, show=False) fig.canvas.draw() plt_results = np.array(fig.canvas.renderer._renderer) print('Processing %d out of %d' % (i_pos_id, 100)) if i_pos_id == 0: width, height, _ = plt_results.shape output_dir = \ data_file.replace('.npy', '_' + str(camera_id) + '.mp4') video = cv2.VideoWriter( os.path.join(init_path.get_abs_base_dir(), output_dir), cv2.VideoWriter_fourcc(*'mp4v'), 40, (height, width)) plt.imshow(plt_results) video.write(plt_results[:, :, [2, 1, 0]]) plt.close() video.release()
def __init__(self, env_name): self._env_name = env_name self._env, self._env_info = env_register.make_env(self._env_name, 1234) self._joint_range = \ np.array(self._env._controller_info['range']) / 180.0 * np.pi self._len_qpos = self._env.get_qpos_size() self._control_info = self._env.get_controller_info()
def __init__(self, env_name, *args, **kwargs): remove_render = re.compile(r'__render$') self.env_name = remove_render.sub('', env_name) from mbbl.env import env_register self.env, _ = env_register.make_env(self.env_name, *args, **kwargs) self.episode_number = 0 # Getting path from logger self.path = logger._get_path() self.obs_buffer = []
def main(args=None): """Run script. Args: args: A list of argument strings to use instead of sys.argv. """ args = parse_args(args) try: env = gym.make(args.env) except gym.error.Error: env, _ = env_register.make_env(args.env, None, misc_info={"reset_type": "gym"}) stats = utils.stats.OnlineMeanVariance() try: print("Observation Space Shape:", env.observation_space.shape) except AttributeError: pass try: print("Action Space Shape:", env.action_space.shape) except AttributeError: pass for _ in tqdm.trange(args.num_samples): state = env.reset() stats.add(np.asarray(state)) print("State Num Dimensions", len(state)) print("mean\n", stats.mean()) print("var\n", stats.variance()) print("stddev\n", np.sqrt(stats.variance())) try: print() print(env.metadata["initial_state.mean"]) print(np.diag(env.metadata["initial_state.covariance"])) print(np.sqrt(np.diag(env.metadata["initial_state.covariance"]))) except (AttributeError, KeyError): pass
def _build_env(self): self._env, self._env_info = env_register.make_env( self.args.task, self._npr.randint(0, 9999), {'allow_monitor': self.args.monitor and self._worker_id == 0}) self._env_solved_reward = self._env_info['SOLVED_REWARD'] \ if 'SOLVED_REWARD' in self._env_info else np.inf
def make_norm_env(cfg): if 'gym' in cfg.env_name: from mbbl.env.env_register import make_env misc_info = {'reset_type': 'gym'} if 'gym_pets' in cfg.env_name: misc_info['pets'] = True env, meta = make_env(cfg.env_name, rand_seed=cfg.seed, misc_info=misc_info) env.metadata = env._env.metadata env.reward_range = env._env.reward_range env.spec = env._env.spec env.unwrapped = env._env.unwrapped # env._configured = env._env._configured env.close = env._env.close env = RescaleAction(env, -1., 1.) # assert np.all(env._env.action_space.high == env._env.action_space.high) assert not cfg.max_episode_steps # env.action_space = env._env.action_space if cfg.env_name == 'gym_fswimmer' or 'gym_pets' in cfg.env_name: env._max_episode_steps = env.env._env_info['max_length'] else: env._max_episode_steps = env.env._env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env._env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): if 'gym_pets' in cfg.env_name or cfg.env_name == 'gym_fswimmer': return env.env._env.seed(seed) else: return env.env._env.env.seed(seed) elif cfg.env_name == 'Humanoid-v2': env = gym.make('Humanoid-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'pets_cheetah': from svg.env import register_pets_environments register_pets_environments() env = gym.make('PetsCheetah-v0') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'pets_reacher': from svg.env import register_pets_environments register_pets_environments() env = gym.make('PetsReacher-v0') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'pets_pusher': from svg.env import register_pets_environments register_pets_environments() env = gym.make('PetsPusher-v0') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'mbpo_hopper': env = gym.make('Hopper-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'mbpo_walker2d': env = gym.make('Walker2d-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps # env.reset_old = env.reset # env.reset = lambda: env.reset_old()[0] def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'mbpo_ant': from .env import register_mbpo_environments register_mbpo_environments() env = gym.make('AntTruncatedObs-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'mbpo_cheetah': from svg.env import register_mbpo_environments register_mbpo_environments() env = gym.make('HalfCheetah-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) elif cfg.env_name == 'mbpo_humanoid': from svg.env import register_mbpo_environments register_mbpo_environments() env = gym.make('HumanoidTruncatedObs-v2') env = RescaleAction(env, -1., 1.) assert not cfg.max_episode_steps env._max_episode_steps = env.env._max_episode_steps def render(mode, height, width, camera_id): frame = env.env.render(mode='rgb_array') return frame env.render = render def set_seed(seed): return env.env.seed(seed) else: assert cfg.env_name.startswith('dmc_') env = dmc.make(cfg) if cfg.pixels: env = FrameStack(env, k=cfg.frame_stack) def set_seed(seed): return env.env.env._env.task.random.seed(seed) else: def set_seed(seed): return env.env._env.task.random.seed(seed) env.set_seed = set_seed return env
TEST = "DYNAMICS_DERIVATIVE" # 'REWARD_DERIVATIVE' # TEST = 'REWARD_DERIVATIVE' # candidate_names = invertedPendulum.env.PENDULUM # candidate_names = pendulum.env.PENDULUM # candidate_names = pets.env.ENV candidate_names = humanoid.env.ENV if __name__ == '__main__' and TEST == 'REWARD_DERIVATIVE': DERIVATIVE_EPS = 1e-6 # test the walker # candidate_names = walker.env.WALKER # candidate_names = reacher.env.ARM_2D max_error = 0.0 for env_name in candidate_names: env, _ = env_register.make_env(env_name, 123, {}) env_info = env_register.get_env_info(env_name) derivative_env, _ = env_register.make_env(env_name, 234, {}) data_dict = \ {'action': np.random.uniform(-1, 1, [1, env_info['action_size']])} data_dict['start_state'], _, _, _ = env.reset() data_dict['start_state'] = data_dict['start_state'].reshape(1, -1) data_dict['start_state'] = data_dict['start_state'].reshape(1, -1) r_u = derivative_env.reward_derivative(data_dict, 'action') r_uu = derivative_env.reward_derivative(data_dict, 'action-action') r_x = derivative_env.reward_derivative(data_dict, 'state') r_xx = derivative_env.reward_derivative(data_dict, 'state-state') # test the derivative of the reward wrt action
def build_network(self): self._env, self._env_info = env_register.make_env( self.args.task_name, self._npr.randint(0, 9999))
def _build_env(self): self._env, self._env_info = env_register.make_env( self.args.task, self._npr.randint(0, 999999), {'allow_monitor': self.args.monitor})
# import matplotlib.pyplot as plt # from mbbl.env.dm_env.pos_dm_env import POS_CONNECTION from mbbl.env.env_register import make_env # from mbbl.util.common import logger # from mbbl.config import init_path # import cv2 # import os # from skimage import draw import numpy as np # from mbbl.util.il.expert_data_util import load_pose_data # import argparse if __name__ == '__main__': env, env_info = make_env("cheetah-run-pos", 1234) control_info = env.get_controller_info() dynamics_env, _ = make_env("cheetah-run-pos", 1234) # generate the data env.reset() for i in range(1000): action = np.random.randn(env_info['action_size']) qpos = np.array(env._env.physics.data.qpos, copy=True) old_qpos = np.array(env._env.physics.data.qpos, copy=True) old_qvel = np.array(env._env.physics.data.qvel, copy=True) old_qacc = np.array(env._env.physics.data.qacc, copy=True) old_qfrc_inverse = np.array(env._env.physics.data.qfrc_inverse, copy=True) _, _, _, _ = env.step(action) ctrl = np.array(env._env.physics.data.ctrl, copy=True) qvel = np.array(env._env.physics.data.qvel, copy=True)
def _build_env(self): self._env, self._env_info = env_register.make_env( self.args.task, self._npr.randint(0, 9999), {'allow_monitor': self.args.monitor and self._worker_id == 0}) self._fake_env = fake_env(self._env, self._step)
def build_network(self): # the placeholders self._env, self._env_info = env_register.make_env( self.args.task_name, self._npr.randint(0, 9999)) self._env.reset() assert hasattr(self._env, 'fdynamics')
def __init__(self, env_name): self.env, _ = make_env(env_name, 1234) self.env.reset()
def get_env_info(env_name, env_seed, dtype): """ @brief: In this function return the reward moment function, initial state mean and initial state covariance. """ env, env_info = env_register.make_env( env_name, env_seed, misc_info={"reset_type": "gym"} ) # the reward moment function if env_name == "gym_walker2d": reward_moment_map = reward_functions.mbbl_walker_reward( 8, # velocity index 0, # height index 1.3, # target_height 3.0, # height_coefficient slice(17, 17 + 6), # actions 0.1, # action coeff ) elif env_name == "gym_hopper": reward_moment_map = reward_functions.mbbl_walker_reward( 5, # velocity index 0, # height index 1.3, # target_height 3.0, # height_coefficient slice(11, 11 + 3), # actions 0.1, # action coeff ) elif env_name == "gym_swimmer": reward_moment_map = reward_functions.mbbl_walker_reward( 3, # velocity index 0, # height index 0.0, # target_height 0.0, # height_coefficient slice(8, 10), # actions 0.0001, # action coeff ) elif env_name == "gym_fswimmer": reward_moment_map = reward_functions.fswimmer_walker_reward( 8, # velocity index 0, # height index 0.0, # target_height 0.0, # height_coefficient slice(8, 10), # actions 0.0001, # action coeff ) elif "gym_cheetah" in env_name: reward_moment_map = reward_functions.mbbl_walker_reward( 8, # velocity index 0, # height index 0.0, # target_height 0.0, # height_coefficient slice(17, 17 + 6), # actions 0.1, # action coeff ) elif env_name == "gym_ant": reward_moment_map = reward_functions.mbbl_walker_reward( 13, # velocity index 0, # height index 0.57, # target_height 3.0, # height_coefficient slice(27, 27 + 8), # actions 0.1, # action coeff ) elif env_name == "gym_reacher": # Action space has size 2. Distance index is [-3:len(state)] # So [-5:] gets both reward_moment_map = reward_functions.surrogate_reacher_reward( distance_index=slice(-5, None) ) elif env_name == "gym_acrobot": reward_moment_map = reward_functions.acrobot_reward() elif "gym_cartpole" in env_name: reward_moment_map = reward_functions.cartpole_reward( position_index=0, position_coefficient=0.01, angle_index=2 ) elif env_name == "gym_mountain": reward_moment_map = reward_functions.mountain_car_reward() elif "gym_pendulum" in env_name: reward_moment_map = reward_functions.pendulum_reward(slice(3, 4)) elif env_name == "gym_invertedPendulum": reward_moment_map = reward_functions.inverted_pendulum_reward() else: raise NotImplementedError # the mean of the initial state initial_state_mean = np.array(np.reshape(get_x0(env_name), [-1]), dtype=np.float64) # the cov of the initial state initial_state_covariance = get_covX0(env_name, env_info) return ( env, env_info, np.array(initial_state_mean, dtype=dtype), np.array(initial_state_covariance, dtype=dtype), reward_moment_map, )
parser.add_argument("--env_name", type=str, required=False, default='cheetah-run-pos') parser.add_argument("-t", "--visualize_type", type=str, required=False, help="The directory of the expert data file", default="inverse_action") parser.add_argument("--sol_qpos_freq", type=int, required=False, default=1) parser.add_argument("--traj_id", type=int, required=False, default=0) parser.add_argument("--traj_length", type=int, required=False, default=1000) args = parser.parse_args() env, env_info = make_env(args.env_name, 1234) control_info = env.get_controller_info() expert_data = load_pose_data(args.expert_data_name, args.camera_id) # how good is the intepolation methods? plot_interpolation(expert_data[args.traj_id], args.sol_qpos_freq, args.traj_length) # how accurate is inverse dynamics? plot_inverse_dynamics_stats(env, expert_data[args.traj_id], args.sol_qpos_freq, control_info, args.traj_length)