Example #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name',
                        type=str,
                        default='maze2d-umaze-v0',
                        help='Maze type. small or default')
    parser.add_argument('--num_episodes',
                        type=int,
                        default=100,
                        help='Num samples to collect')
    args = parser.parse_args()

    env = gym.make(args.env_name)
    env.seed(0)
    np.random.seed(0)
    controller = waypoint_controller.WaypointController(env.str_maze_spec)

    ravg = []
    for _ in range(args.num_episodes):
        s = env.reset()
        returns = 0
        for t in range(env._max_episode_steps):
            position = s[0:2]
            velocity = s[2:4]
            act, done = controller.get_action(position, velocity,
                                              env.get_target())
            s, rew, _, _ = env.step(act)
            returns += rew
        ravg.append(returns)
    print(args.env_name, 'returns', np.mean(ravg))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name',
                        type=str,
                        default='maze2d-umaze-v0',
                        help='Maze type. small or default')
    parser.add_argument('--num_episodes',
                        type=int,
                        default=100,
                        help='Num samples to collect')
    parser.add_argument('--render', action='store_true')
    args = parser.parse_args()

    env = gym.make(args.env_name)
    if args.render:
        env.render('human')
    env.seed(0)
    np.random.seed(0)
    d_gain = -2.0
    p_gain = 10.0
    controller = waypoint_controller.WaypointController(env.env.str_maze_spec,
                                                        p_gain=p_gain,
                                                        d_gain=d_gain)
    print('max steps:', env._max_episode_steps)

    ravg = []
    for _ in range(args.num_episodes):
        controller = waypoint_controller.WaypointController(
            env.env.str_maze_spec, p_gain=p_gain, d_gain=d_gain)
        s = env.reset()
        returns = 0
        for t in range(env._max_episode_steps):
            position = s[0:2]
            velocity = s[2:4]
            act, done = controller.get_action(position, velocity,
                                              np.array(env.env.get_target()))
            #print(position-1, controller.current_waypoint(), np.array(env.env.get_target()) - 1)
            #print('\t', act)
            s, rew, _, _ = env.step(act)
            if args.render:
                time.sleep(0.01)
                env.render('human')
            returns += rew
        print(returns)
        ravg.append(returns)
    print(args.env_name, 'returns', np.mean(ravg))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--render', action='store_true', help='Render trajectories')
    parser.add_argument('--noisy', action='store_true', help='Noisy actions')
    parser.add_argument('--env_name', type=str, default='maze2d-umaze-v1', help='Maze type')
    parser.add_argument('--num_samples', type=int, default=int(1e6), help='Num samples to collect')
    args = parser.parse_args()

    env = gym.make(args.env_name)
    maze = env.str_maze_spec
    max_episode_steps = env._max_episode_steps

    # default: p=10, d=-1
    controller = waypoint_controller.WaypointController(maze, p_gain=10.0, d_gain=-2.0)
    env = bullet_maze.Maze2DBulletEnv(maze)
    if args.render:
        env.render('human')

    env.set_target()
    s = env.reset()
    act = env.action_space.sample()
    timeout = False

    data = reset_data()
    last_position = s[0:2]
    ts = 0
    for _ in range(args.num_samples):
        position = s[0:2]
        velocity = s[2:4]

        # subtract 1.0 due to offset between tabular maze representation and bullet state
        act, done = controller.get_action(position , velocity, env._target)
        if args.noisy:
            act = act + np.random.randn(*act.shape)*0.5

        act = np.clip(act, -1.0, 1.0)
        if ts >= max_episode_steps:
            timeout = True
        append_data(data, s, act, env._target, done, timeout, env.robot)

        ns, _, _, _ = env.step(act)

        if len(data['observations']) % 10000 == 0:
            print(len(data['observations']))

        ts += 1
        if done:
            env.set_target()
            done = False
            ts = 0
        else:
            last_position = s[0:2]
            s = ns

        if args.render:
            env.render('human')

    
    if args.noisy:
        fname = '%s-noisy-bullet.hdf5' % args.env_name
    else:
        fname = '%s-bullet.hdf5' % args.env_name
    dataset = h5py.File(fname, 'w')
    npify(data)
    for k in data:
        dataset.create_dataset(k, data=data[k], compression='gzip')
Example #4
0
def sample_env_and_controller(args, layout):
    layout_str = semantic_layout2str(layout)
    env = maze_model.MazeEnv(layout_str, agent_centric_view=args.agent_centric)
    controller = waypoint_controller.WaypointController(layout_str)
    return env, controller
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--render',
                        action='store_true',
                        help='Render trajectories')
    parser.add_argument('--noisy', action='store_true', help='Noisy actions')
    parser.add_argument('--env_name',
                        type=str,
                        default='maze2d-umaze-v1',
                        help='Maze type')
    parser.add_argument('--num_samples',
                        type=int,
                        default=int(1e6),
                        help='Num samples to collect')
    args = parser.parse_args()

    env = gym.make(args.env_name)
    maze = env.str_maze_spec
    max_episode_steps = env._max_episode_steps

    controller = waypoint_controller.WaypointController(maze)
    env = maze_model.MazeEnv(maze)

    env.set_target()
    s = env.reset()
    act = env.action_space.sample()
    done = False

    data = reset_data()
    ts = 0
    for _ in range(args.num_samples):
        position = s[0:2]
        velocity = s[2:4]
        act, done = controller.get_action(position, velocity, env._target)
        if args.noisy:
            act = act + np.random.randn(*act.shape) * 0.5

        act = np.clip(act, -1.0, 1.0)
        if ts >= max_episode_steps:
            done = True
        append_data(data, s, act, env._target, done, env.sim.data)

        ns, _, _, _ = env.step(act)

        if len(data['observations']) % 10000 == 0:
            print(len(data['observations']))

        ts += 1
        if done:
            env.set_target()
            done = False
            ts = 0
        else:
            s = ns

        if args.render:
            env.render()

    if args.noisy:
        fname = '%s-noisy.hdf5' % args.env_name
    else:
        fname = '%s.hdf5' % args.env_name
    dataset = h5py.File(fname, 'w')
    npify(data)
    for k in data:
        dataset.create_dataset(k, data=data[k], compression='gzip')
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--render',
                        action='store_true',
                        help='Render trajectories')
    parser.add_argument('--noisy', action='store_true', help='Noisy actions')
    parser.add_argument('--agent_centric',
                        action='store_true',
                        help='Whether agent-centric images are rendered.')
    parser.add_argument('--save_images',
                        action='store_true',
                        help='Whether rendered images are saved.')
    parser.add_argument('--maze',
                        type=str,
                        default='hardexpv2',
                        help='Maze type. small or default')
    parser.add_argument('--data_dir',
                        type=str,
                        default='.',
                        help='Base directory for dataset')
    parser.add_argument('--num_samples',
                        type=int,
                        default=int(2e5),
                        help='Num samples to collect')
    parser.add_argument('--min_traj_len',
                        type=int,
                        default=int(20),
                        help='Min number of samples per trajectory')
    parser.add_argument('--batch_idx',
                        type=int,
                        default=int(-1),
                        help='(Optional) Index of generated data batch')
    args = parser.parse_args()
    if args.agent_centric and not args.save_images:
        raise ValueError("Need to save images for agent-centric dataset")

    if args.maze == 'umaze':
        maze = maze_layouts.U_MAZE
        max_episode_steps = 150
    elif args.maze == 'open':
        maze = maze_layouts.OPEN
        max_episode_steps = 150
    elif args.maze == 'medium':
        maze = maze_layouts.MEDIUM_MAZE
        max_episode_steps = 250
    elif args.maze == 'hardexp':
        maze = maze_layouts.HARD_EXP_MAZE
        max_episode_steps = 300
    elif args.maze == 'hardexpv2':
        maze = maze_layouts.HARD_EXP_MAZE_V2
        max_episode_steps = 1600 if not args.agent_centric else 400
    else:
        maze = maze_layouts.LARGE_MAZE
        max_episode_steps = 600
    controller = waypoint_controller.WaypointController(maze)
    env = maze_model.MazeEnv(maze, agent_centric_view=args.agent_centric)

    s = reset_env(env, agent_centric=args.agent_centric)

    data = reset_data()
    ts, cnt = 0, 0
    for tt in tqdm.tqdm(range(args.num_samples)):
        position = s[0:2]
        velocity = s[2:4]
        act, done = controller.get_action(position, velocity, env._target)
        if args.noisy:
            act = act + np.random.randn(*act.shape) * 0.5

        act = np.clip(act, -1.0, 1.0)
        if ts >= max_episode_steps:
            done = True
        append_data(data, s, act, env.render(mode='rgb_array'), env._target,
                    done, env.sim.data)

        ns, _, _, _ = env.step(act)

        ts += 1
        if done:
            if len(data['actions']) > args.min_traj_len:
                save_data(args, data, cnt)
                cnt += 1
            data = reset_data()
            s = reset_env(env, agent_centric=args.agent_centric)
            ts = 0
        else:
            s = ns

        if args.render:
            env.render(mode='human')
def sample_env_and_controller(args):
    layout_str = maze_layouts.rand_layout(seed=0, size=args.fixed_maze_size)
    env = maze_model.MazeEnv(layout_str, agent_centric_view=args.agent_centric)
    controller = waypoint_controller.WaypointController(layout_str)
    return env, controller
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--render', action='store_true', help='Render trajectories')
    parser.add_argument('--noisy', action='store_true', help='Noisy actions')
    parser.add_argument('--env_name', type=str, default='maze2d-umaze-v1', help='Maze type')
    parser.add_argument('--num_samples', type=int, default=int(1e6), help='Num samples to collect')
    parser.add_argument('--data_dir', type=str, default='.', help='Base directory for dataset')
    parser.add_argument('--batch_idx', type=int, default=int(-1), help='(Optional) Index of generated data batch')
    args = parser.parse_args()

    env = gym.make(args.env_name)
    maze = env.str_maze_spec
    max_episode_steps = env._max_episode_steps

    controller = waypoint_controller.WaypointController(maze)
    env = maze_model.MazeEnv(maze)

    env.set_target()
    s = env.reset()
    act = env.action_space.sample()
    done = False

    data = reset_data()
    ts = 0
    for _ in range(args.num_samples):
        position = s[0:2]
        velocity = s[2:4]
        act, done = controller.get_action(position, velocity, env._target)
        if args.noisy:
            act = act + np.random.randn(*act.shape)*0.5

        act = np.clip(act, -1.0, 1.0)
        if ts >= max_episode_steps:
            done = True
        append_data(data, s, act, env._target, done, env.sim.data)

        ns, _, _, _ = env.step(act)

        if len(data['observations']) % 1000 == 0:
            print(len(data['observations']))

        ts += 1
        if done:
            env.set_target()
            done = False
            ts = 0
        else:
            s = ns

        if args.render:
            env.render()

    if args.batch_idx >= 0:
        dir_name = 'maze2d-%s-noisy' % args.maze if args.noisy else 'maze2d-%s-sparse' % args.maze
        os.makedirs(os.path.join(args.data_dir, dir_name), exist_ok=True)
        fname = os.path.join(args.data_dir, dir_name, "rollouts_batch_{}.h5".format(args.batch_idx))
    else:
        os.makedirs(args.data_dir, exist_ok=True)
        fname = 'maze2d-%s-noisy.hdf5' % args.maze if args.noisy else 'maze2d-%s-sparse.hdf5' % args.maze
        fname = os.path.join(args.data_dir, fname)

    dataset = h5py.File(fname, 'w')
    npify(data)
    for k in data:
        dataset.create_dataset(k, data=data[k], compression='gzip')