def main(): parser = argparse.ArgumentParser() parser.add_argument('--env_name', type=str, default='maze2d-umaze-v0', help='Maze type. small or default') parser.add_argument('--num_episodes', type=int, default=100, help='Num samples to collect') args = parser.parse_args() env = gym.make(args.env_name) env.seed(0) np.random.seed(0) controller = waypoint_controller.WaypointController(env.str_maze_spec) ravg = [] for _ in range(args.num_episodes): s = env.reset() returns = 0 for t in range(env._max_episode_steps): position = s[0:2] velocity = s[2:4] act, done = controller.get_action(position, velocity, env.get_target()) s, rew, _, _ = env.step(act) returns += rew ravg.append(returns) print(args.env_name, 'returns', np.mean(ravg))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--env_name', type=str, default='maze2d-umaze-v0', help='Maze type. small or default') parser.add_argument('--num_episodes', type=int, default=100, help='Num samples to collect') parser.add_argument('--render', action='store_true') args = parser.parse_args() env = gym.make(args.env_name) if args.render: env.render('human') env.seed(0) np.random.seed(0) d_gain = -2.0 p_gain = 10.0 controller = waypoint_controller.WaypointController(env.env.str_maze_spec, p_gain=p_gain, d_gain=d_gain) print('max steps:', env._max_episode_steps) ravg = [] for _ in range(args.num_episodes): controller = waypoint_controller.WaypointController( env.env.str_maze_spec, p_gain=p_gain, d_gain=d_gain) s = env.reset() returns = 0 for t in range(env._max_episode_steps): position = s[0:2] velocity = s[2:4] act, done = controller.get_action(position, velocity, np.array(env.env.get_target())) #print(position-1, controller.current_waypoint(), np.array(env.env.get_target()) - 1) #print('\t', act) s, rew, _, _ = env.step(act) if args.render: time.sleep(0.01) env.render('human') returns += rew print(returns) ravg.append(returns) print(args.env_name, 'returns', np.mean(ravg))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--render', action='store_true', help='Render trajectories') parser.add_argument('--noisy', action='store_true', help='Noisy actions') parser.add_argument('--env_name', type=str, default='maze2d-umaze-v1', help='Maze type') parser.add_argument('--num_samples', type=int, default=int(1e6), help='Num samples to collect') args = parser.parse_args() env = gym.make(args.env_name) maze = env.str_maze_spec max_episode_steps = env._max_episode_steps # default: p=10, d=-1 controller = waypoint_controller.WaypointController(maze, p_gain=10.0, d_gain=-2.0) env = bullet_maze.Maze2DBulletEnv(maze) if args.render: env.render('human') env.set_target() s = env.reset() act = env.action_space.sample() timeout = False data = reset_data() last_position = s[0:2] ts = 0 for _ in range(args.num_samples): position = s[0:2] velocity = s[2:4] # subtract 1.0 due to offset between tabular maze representation and bullet state act, done = controller.get_action(position , velocity, env._target) if args.noisy: act = act + np.random.randn(*act.shape)*0.5 act = np.clip(act, -1.0, 1.0) if ts >= max_episode_steps: timeout = True append_data(data, s, act, env._target, done, timeout, env.robot) ns, _, _, _ = env.step(act) if len(data['observations']) % 10000 == 0: print(len(data['observations'])) ts += 1 if done: env.set_target() done = False ts = 0 else: last_position = s[0:2] s = ns if args.render: env.render('human') if args.noisy: fname = '%s-noisy-bullet.hdf5' % args.env_name else: fname = '%s-bullet.hdf5' % args.env_name dataset = h5py.File(fname, 'w') npify(data) for k in data: dataset.create_dataset(k, data=data[k], compression='gzip')
def sample_env_and_controller(args, layout): layout_str = semantic_layout2str(layout) env = maze_model.MazeEnv(layout_str, agent_centric_view=args.agent_centric) controller = waypoint_controller.WaypointController(layout_str) return env, controller
def main(): parser = argparse.ArgumentParser() parser.add_argument('--render', action='store_true', help='Render trajectories') parser.add_argument('--noisy', action='store_true', help='Noisy actions') parser.add_argument('--env_name', type=str, default='maze2d-umaze-v1', help='Maze type') parser.add_argument('--num_samples', type=int, default=int(1e6), help='Num samples to collect') args = parser.parse_args() env = gym.make(args.env_name) maze = env.str_maze_spec max_episode_steps = env._max_episode_steps controller = waypoint_controller.WaypointController(maze) env = maze_model.MazeEnv(maze) env.set_target() s = env.reset() act = env.action_space.sample() done = False data = reset_data() ts = 0 for _ in range(args.num_samples): position = s[0:2] velocity = s[2:4] act, done = controller.get_action(position, velocity, env._target) if args.noisy: act = act + np.random.randn(*act.shape) * 0.5 act = np.clip(act, -1.0, 1.0) if ts >= max_episode_steps: done = True append_data(data, s, act, env._target, done, env.sim.data) ns, _, _, _ = env.step(act) if len(data['observations']) % 10000 == 0: print(len(data['observations'])) ts += 1 if done: env.set_target() done = False ts = 0 else: s = ns if args.render: env.render() if args.noisy: fname = '%s-noisy.hdf5' % args.env_name else: fname = '%s.hdf5' % args.env_name dataset = h5py.File(fname, 'w') npify(data) for k in data: dataset.create_dataset(k, data=data[k], compression='gzip')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--render', action='store_true', help='Render trajectories') parser.add_argument('--noisy', action='store_true', help='Noisy actions') parser.add_argument('--agent_centric', action='store_true', help='Whether agent-centric images are rendered.') parser.add_argument('--save_images', action='store_true', help='Whether rendered images are saved.') parser.add_argument('--maze', type=str, default='hardexpv2', help='Maze type. small or default') parser.add_argument('--data_dir', type=str, default='.', help='Base directory for dataset') parser.add_argument('--num_samples', type=int, default=int(2e5), help='Num samples to collect') parser.add_argument('--min_traj_len', type=int, default=int(20), help='Min number of samples per trajectory') parser.add_argument('--batch_idx', type=int, default=int(-1), help='(Optional) Index of generated data batch') args = parser.parse_args() if args.agent_centric and not args.save_images: raise ValueError("Need to save images for agent-centric dataset") if args.maze == 'umaze': maze = maze_layouts.U_MAZE max_episode_steps = 150 elif args.maze == 'open': maze = maze_layouts.OPEN max_episode_steps = 150 elif args.maze == 'medium': maze = maze_layouts.MEDIUM_MAZE max_episode_steps = 250 elif args.maze == 'hardexp': maze = maze_layouts.HARD_EXP_MAZE max_episode_steps = 300 elif args.maze == 'hardexpv2': maze = maze_layouts.HARD_EXP_MAZE_V2 max_episode_steps = 1600 if not args.agent_centric else 400 else: maze = maze_layouts.LARGE_MAZE max_episode_steps = 600 controller = waypoint_controller.WaypointController(maze) env = maze_model.MazeEnv(maze, agent_centric_view=args.agent_centric) s = reset_env(env, agent_centric=args.agent_centric) data = reset_data() ts, cnt = 0, 0 for tt in tqdm.tqdm(range(args.num_samples)): position = s[0:2] velocity = s[2:4] act, done = controller.get_action(position, velocity, env._target) if args.noisy: act = act + np.random.randn(*act.shape) * 0.5 act = np.clip(act, -1.0, 1.0) if ts >= max_episode_steps: done = True append_data(data, s, act, env.render(mode='rgb_array'), env._target, done, env.sim.data) ns, _, _, _ = env.step(act) ts += 1 if done: if len(data['actions']) > args.min_traj_len: save_data(args, data, cnt) cnt += 1 data = reset_data() s = reset_env(env, agent_centric=args.agent_centric) ts = 0 else: s = ns if args.render: env.render(mode='human')
def sample_env_and_controller(args): layout_str = maze_layouts.rand_layout(seed=0, size=args.fixed_maze_size) env = maze_model.MazeEnv(layout_str, agent_centric_view=args.agent_centric) controller = waypoint_controller.WaypointController(layout_str) return env, controller
def main(): parser = argparse.ArgumentParser() parser.add_argument('--render', action='store_true', help='Render trajectories') parser.add_argument('--noisy', action='store_true', help='Noisy actions') parser.add_argument('--env_name', type=str, default='maze2d-umaze-v1', help='Maze type') parser.add_argument('--num_samples', type=int, default=int(1e6), help='Num samples to collect') parser.add_argument('--data_dir', type=str, default='.', help='Base directory for dataset') parser.add_argument('--batch_idx', type=int, default=int(-1), help='(Optional) Index of generated data batch') args = parser.parse_args() env = gym.make(args.env_name) maze = env.str_maze_spec max_episode_steps = env._max_episode_steps controller = waypoint_controller.WaypointController(maze) env = maze_model.MazeEnv(maze) env.set_target() s = env.reset() act = env.action_space.sample() done = False data = reset_data() ts = 0 for _ in range(args.num_samples): position = s[0:2] velocity = s[2:4] act, done = controller.get_action(position, velocity, env._target) if args.noisy: act = act + np.random.randn(*act.shape)*0.5 act = np.clip(act, -1.0, 1.0) if ts >= max_episode_steps: done = True append_data(data, s, act, env._target, done, env.sim.data) ns, _, _, _ = env.step(act) if len(data['observations']) % 1000 == 0: print(len(data['observations'])) ts += 1 if done: env.set_target() done = False ts = 0 else: s = ns if args.render: env.render() if args.batch_idx >= 0: dir_name = 'maze2d-%s-noisy' % args.maze if args.noisy else 'maze2d-%s-sparse' % args.maze os.makedirs(os.path.join(args.data_dir, dir_name), exist_ok=True) fname = os.path.join(args.data_dir, dir_name, "rollouts_batch_{}.h5".format(args.batch_idx)) else: os.makedirs(args.data_dir, exist_ok=True) fname = 'maze2d-%s-noisy.hdf5' % args.maze if args.noisy else 'maze2d-%s-sparse.hdf5' % args.maze fname = os.path.join(args.data_dir, fname) dataset = h5py.File(fname, 'w') npify(data) for k in data: dataset.create_dataset(k, data=data[k], compression='gzip')