Esempio n. 1
0
def test_basics():
    env = TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10)
    env = EnvDataset(env)
    env = EpisodeLimit(env, max_episodes=3)
    env.seed(123)

    for episode in range(3):
        obs = env.reset()
        done = False
        step = 0
        while not done:
            print(f"step {step}")
            obs, reward, done, info = env.step(env.action_space.sample())
            step += 1
    
    assert env.is_closed()
    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        _ = env.step(env.action_space.sample())

    with pytest.raises(gym.error.ClosedEnvironmentError):
        for _ in env:
            break
Esempio n. 2
0
 def func():
     env = gym.make(gym_id)
     env = TimeLimit(env, max_episode_steps = args.max_episode_len)
     env = EnvironmentWrapper(env.env, normOb=normOb, rewardNormalization=rewardNormalization, clipOb=clipOb, clipRew=clipRew, **kwargs)
     env.seed(args.seed)
     env.action_space.seed(args.seed)
     env.observation_space.seed(args.seed)  
     return env
Esempio n. 3
0
    def _thunk():
        env = make_benchmarking_env(env_id)
        env = TimeLimit(env, max_episode_steps)

        env.seed(seed + rank)
        log_dir_ = os.path.join(log_dir,
                                str(rank)) if log_dir is not None else log_dir
        env = Monitor(env, log_dir_, allow_early_resets=allow_early_resets)

        return env
Esempio n. 4
0
 def test_max_and_skip_env(self):
     # runable test
     skip = 4
     env = gym.make(TEST_ENV_ID)
     env = TimeLimit(env, 20)
     env = atari.MaxAndSkipEnv(env, skip=skip)
     env.seed(1)
     ub_utils.set_seed(1)
     env.reset()
     for i in range(20):
         obs, rew, done, info = env.step(env.action_space.sample())
         if done:
             break
     self.assertEqual(4, i)
def process_env(args, bot):
    parsing_metric = DictList()
    for episode_id in tqdm.tqdm(range(args.episodes)):
        env = TimeLimit(gym.make(random.choice(args.envs)), 100)
        if args.seed is not None:
            env.seed(args.seed + episode_id)

        demo_bot = demo.DemoBot(env=env)
        while True:
            try:
                ret, _demo_traj, viz = demo.generate_one_traj(
                    demo_bot, env, render_mode='ansi')
                if ret < len(env.sketchs):
                    continue
                demo_traj = DictList(_demo_traj)
                demo_traj.done = [False] * (len(demo_traj) - 1) + [True]
                demo_traj.action = [a.value for a in _demo_traj['action']]
                demo_traj.env_id = [env.env_id] * len(demo_traj)
                demo_traj.apply(lambda _t: torch.tensor(_t).unsqueeze(0)
                                if not isinstance(_t[0], str) else _t)
                break
            except demo.PlanningError:
                pass
        with torch.no_grad():
            traj = teacher_force(bot, demo_traj)
            traj.viz = viz

        ps = traj.p
        ps[0, :-1] = 0
        ps[0, -1] = 1

        # Compute F1
        use_ids = (traj.action.reshape(-1)[1:-1] == Actions.USE.value
                   ).nonzero().view(-1).cpu().numpy()
        target = use_ids.tolist()
        p_vals = torch.arange(bot.nb_slots + 1)
        avg_p = (p_vals * ps[1:-1]).sum(-1)
        for k in [2, 3, 4, 5, 6]:
            _, inds = (-avg_p).topk(k)
            preds = inds.tolist()
            for tol in [1]:
                result = f1(target, preds, tol, with_detail=True)
                for name in result:
                    parsing_metric.append(
                        {'{}_tol{}_k{}'.format(name, tol, k): result[name]})

    parsing_metric.apply(lambda _t: np.mean(_t))
    return parsing_metric
Esempio n. 6
0
 def _thunk():
     env = gym.make(id)
     if not timelimit:
         env = env.env
     elif timelimit_maxsteps:
         env = TimeLimit(env.env, timelimit_maxsteps)
     assert 'NoFrameskip' in env.spec.id
     if noop:
         env = atari_wrappers.NoopResetEnv(env, noop_max=30)
     env = atari_wrappers.MaxAndSkipEnv(env, skip=frameskip)
     env = StepOnEndOfLifeEnv(env)
     env = EpisodeInfo(env)
     env.seed(seed + rank)
     env = atari_wrappers.wrap_deepmind(
         env, episode_life=episode_life, clip_rewards=clip_rewards,
         frame_stack=False, scale=scale)
     env = ImageTranspose(env)
     return env
Esempio n. 7
0
class BaseTestRotMAB:
    """Base test class for RotMAB environment."""
    def __init__(self, winning_probs, max_steps):
        """Initialize test class."""
        self.winning_probs = winning_probs
        self.max_steps = max_steps
        self.env = TimeLimit(
            NonMarkovianRotatingMAB(winning_probs=self.winning_probs),
            max_episode_steps=self.max_steps,
        )

    def test_action_space(self):
        """Test action spaces."""
        assert self.env.action_space == Discrete(len(self.winning_probs))

    def test_observation_space(self):
        """Test observation spaces."""
        assert self.env.observation_space == Discrete(2)

    def test_interaction(self):
        """Test interaction with Rotating MAB."""
        self.env.seed()
        state = self.env.reset()
        assert state == 0

        def assert_consistency(obs, reward):
            """Assert obs = 1 iff reward = 1."""
            positive_reward = reward > 0.0
            positive_obs = obs == 1
            assert (positive_reward and positive_obs
                    or (not positive_reward and not positive_obs))

        for _i in range(self.max_steps - 1):
            action = self.env.action_space.sample()
            obs, reward, done, info = self.env.step(action)
            assert_consistency(obs, reward)
            assert not done

        # last action
        obs, reward, done, info = self.env.step(0)
        assert_consistency(obs, reward)
        assert done
Esempio n. 8
0
        env._max_episode_steps = int(args.episode_length)
    args.episode_length = env._max_episode_steps
else:
    env = TimeLimit(env, int(args.episode_length))
env = NormalizedEnv(env.env,
                    ob=args.norm_obs,
                    ret=args.norm_returns,
                    clipob=args.obs_clip,
                    cliprew=args.rew_clip,
                    gamma=args.gamma)
env = TimeLimit(env, int(args.episode_length))
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic
env.seed(args.seed)
env.action_space.seed(args.seed)
env.observation_space.seed(args.seed)
if args.capture_video:
    env = Monitor(env, f'videos/{experiment_name}')


# ALGO LOGIC: initialize agent here:
class CategoricalMasked(Categorical):
    def __init__(self, probs=None, logits=None, validate_args=None, masks=[]):
        self.masks = masks
        if len(self.masks) == 0:
            super(CategoricalMasked, self).__init__(probs, logits,
                                                    validate_args)
        else:
            self.masks = masks.type(torch.BoolTensor).to(device)
Esempio n. 9
0
def process_env(args, bot, env_name):
    succs = []
    rets = []
    lines = []
    parsing_metric = DictList()
    model_name = os.path.dirname(os.path.abspath(
        args.model_ckpt)).split('/')[-1] + '_{}'.format(env_name)
    if args.use_demo:
        model_name = model_name + '_demo'
    render_mode = 'rgb' if args.mp4 else 'ansi'
    for episode_id in tqdm.tqdm(range(args.episodes)):
        env = TimeLimit(
            gym.make(env_name, height=args.height, width=args.width),
            args.max_steps)
        if args.seed is not None:
            env.seed(args.seed + episode_id)
        if args.use_demo:
            demo_bot = demo.DemoBot(env=env)
            while True:
                try:
                    ret, _demo_traj, viz = demo.generate_one_traj(
                        demo_bot, env, render_mode=render_mode)
                    if ret < len(env.sketchs):
                        continue
                    demo_traj = DictList(_demo_traj)
                    demo_traj.done = [False] * (len(demo_traj) - 1) + [True]
                    demo_traj.action = [a.value for a in _demo_traj['action']]
                    demo_traj.env_id = [env.env_id] * len(demo_traj)
                    demo_traj.apply(lambda _t: torch.tensor(_t).unsqueeze(0)
                                    if not isinstance(_t[0], str) else _t)
                    break
                except demo.PlanningError:
                    pass
            with torch.no_grad():
                traj = teacher_force(bot, demo_traj)
                traj.viz = viz
            success = True
        else:
            with torch.no_grad():
                traj, success, ret = generate(bot, env)
                ret = ret.item()

        succs.append(float(success))
        rets.append(ret)
        lines.append('########## [{}]EPISODE {} ##############'.format(
            env_name, episode_id))
        lines.append('return: {}  success: {}'.format(ret, success))

        has_p = 'p' in traj
        states = traj.viz
        rewards = traj.reward
        actions = traj.action
        if has_p:
            ps = traj.p
            ps[0, :-1] = 0
            ps[0, -1] = 1
            lines.append('P: ')
            lines.append(idxpos2tree(actions, ps))
            if 'p_out' in traj:
                lines.append('P_out:')
                lines.append(idxpos2tree(actions, traj.p_out))
            slots = torch.arange(len(ps[0]))
            p_avg = len(slots) - (slots[None, :] * ps).sum(-1)
            p_avg_str = plot(p_avg, actions=actions)
            lines.append('p_avg:')
            lines.append(p_avg_str)

            lines.append('Tree')
            depths = p_avg[:-1]
            depths = (depths - depths.min()) / (depths.max() - depths.min())
            depths = np.digitize(depths.numpy(), bins=np.linspace(0, 1, 5))
            parse_tree = distance2ctree(
                depths, [ACTION_VOCAB[a.item()] for a in actions], False)
            tree_line = tree_to_str(parse_tree)
            lines.append(tree_line[1:-1])

            # Compute F1
            if args.use_demo:
                use_ids = (traj.action.reshape(-1)[1:-1] == Actions.USE.value
                           ).nonzero().view(-1).cpu().numpy()
                target = use_ids.tolist()
                p_vals = torch.arange(bot.nb_slots + 1)
                avg_p = (p_vals * ps[1:-1]).sum(-1)
                for k in [3, 4, 5, 6]:
                    _, inds = (-avg_p).topk(k)
                    preds = inds.tolist()
                    for tol in [0]:
                        result = f1(target, preds, tol, with_detail=True)
                        for name in result:
                            parsing_metric.append({
                                '{}_tol{}_k{}'.format(name, tol, k):
                                result[name]
                            })

            # Generate Plots
            if args.plot_p:
                p_avg_fig = get_p_plot(actions, ps)
                p_avg_fig.savefig(os.path.join(
                    args.outdir, model_name + '_{}.png'.format(episode_id)),
                                  bbox_inches='tight')
                plt.close(p_avg_fig)

        # Save episode details
        if not args.mp4:
            episode_lines = []
            for t in range(len(rewards)):
                episode_lines.append('################################')
                episode_lines.append('Sketch: {}'.format(env.sketchs))
                info_line = "steps: {}\taction: {}\treward: {}".format(
                    t, ACTION_VOCAB[actions[t].item()], rewards[t])
                if has_p:
                    info_line += '\tp: {}'.format(
                        np.array2string(
                            ps[t].clamp(min=1e-8).numpy(),
                            formatter={'float_kind': lambda x: visual(x, 1)}))
                episode_lines.append(info_line)
                episode_lines.append(states[t])

            episode_res_name = os.path.join(
                args.outdir, model_name + '_{}.txt'.format(episode_id))
            with open(episode_res_name, 'w') as f:
                f.write('\n'.join(episode_lines))

        # Save MP4
        else:
            returns = rewards.cumsum(0)
            if not has_p:
                frames = states
            else:
                frames = []
                sketch_id = 0
                prev_ret = 0
                for time_step, state_frame in enumerate(states):
                    curr_ret = returns[time_step -
                                       1].item() if time_step - 1 > 0 else 0
                    curr_rwd = rewards[time_step -
                                       1].item() if time_step - 1 > 0 else 0
                    info_line = "steps: {}, reward: {}, ret: {} \n subtask: {}".format(
                        time_step, curr_rwd, curr_ret, env.sketchs[sketch_id])
                    special = curr_ret > prev_ret
                    if special:
                        sketch_id += 1
                        prev_ret = returns[time_step - 1]
                        info_line += "(done)"
                    p_fig = get_p_plot(actions, ps, time_step, info_line)
                    w, h = p_fig.canvas.get_width_height()
                    p_fig.tight_layout(pad=0)
                    p_fig.canvas.draw()
                    p_img = np.fromstring(p_fig.canvas.tostring_rgb(),
                                          dtype=np.uint8).reshape(h, w, 3)
                    plt.close(p_fig)

                    # Concat
                    final_frame = np.concatenate([p_img, state_frame], axis=1)
                    frames.append(final_frame)

                    if special:
                        for _ in range(2):
                            frames.append(final_frame)

            # Write to mp4
            print('Producing videos...')
            frames = [Image.fromarray(frame) for frame in frames]

            # Repeat ending frame for additional time
            frames.append(frames[-1])
            videodims = (frames[0].width, frames[0].height)
            video = cv2.VideoWriter(
                os.path.join(args.outdir,
                             model_name + "_{}.mp4".format(episode_id)),
                0x7634706d, 1, videodims)
            for frame in frames:
                video.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
            video.release()

    lines.append(
        '######################{}#########################'.format(env_name))
    lines.append('Avg return {}'.format(sum(rets) / args.episodes))
    lines.append('Avg success rate {}'.format(sum(succs) / args.episodes))
    print('{} return: {} success {}'.format(env_name, np.mean(rets),
                                            np.mean(succs)))
    parsing_metric.apply(lambda _t: np.mean(_t))
    for key, val in parsing_metric.items():
        print(key, val)
    with open(os.path.join(args.outdir, model_name + '.out'), 'w') as f:
        f.write('\n'.join(lines))
Esempio n. 10
0
 def _init():
     env = gym.make(env_name)
     env = TimeLimit(env, timestep_limit)
     env = Monitor(env, log_folder + 'seed_' + str(seed + rank))
     env.seed(seed + rank)
     return env