コード例 #1
0
def summarize(agent, env, settings: Dict[str, Any], seeds: List[int],
              writer: SummaryWriter):
    rewards = [0] * len(seeds)
    for i, seed in enumerate(seeds):
        if seed is not None:
            env.seed(seed)
        state = env.reset()
        goal_point = find(state, 'Goal')
        option = Option(goal_point, depth=0)
        agent.reset(env, option, random_seed=3)

        done = False
        while not done:
            action = agent.act(state, option)
            state, reward, done, _ = env.step(action)

            rewards[i] += reward

    writer.add_hparams(
        {
            key: value
            for (key, value) in settings.items() if key not in ['device']
        }, {
            'average reward': np.mean(rewards),
            'min reward': np.min(rewards),
            'max reward': np.max(rewards)
        })
コード例 #2
0
    def visualize(agent: SMARTAgent, ep: int, ts: int):
        images = []
        for seed in seeds:
            if seed is not None:
                env.seed(seed)
            state = env.reset()
            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            visualize_decision(agent, state, writer, f'likelihoods: {seed}',
                               ep, ts)

            images.append(env.render('rgb_array'))
            done = False
            while not done:
                action = agent.act(state, option)
                state, reward, done, _ = env.step(action)
                options = _get_option_tree_(agent)
                print(
                    f"@{onehot2directedpoint(state)} : {reward} => {options}")
                rendered = _render_options_(env.render('rgb_array'), options)
                images.append(rendered)
        gif = np.stack(images, 0)
        # np.ndarray [t, imx, imy, 3]
        gif_tensor: torch.Tensor = torch.from_numpy(gif).type(
            torch.uint8).unsqueeze(0)
        # torch.Tensor[uint8] [1, t, imx, imy, 3]
        gif_tensor = gif_tensor.permute(0, 1, 4, 2, 3)
        writer.add_video('sample trajectory', gif_tensor, global_step=ts)
コード例 #3
0
 def generate(self, state: OneHotImg, prev_option: Optional[Option[Point]],
              parent_option: Option[Point]) -> List[Option[Point]]:
     xdim: int = state.shape[0]
     ydim: int = state.shape[1]
     depth: int = parent_option.depth
     if prev_option is not None:
         depth = max(prev_option.depth, depth)
     child_depth: int = depth + 1
     result: List[Option[Point]] = []
     # not quite right, but haven't figured out good solution
     for x in range(xdim):
         for y in range(ydim):
             point: Point = np.asarray([x, y], dtype=np.int8)
             if tile_type(state, point) in ['Empty', 'Goal']:
                 result.append(Option(point, child_depth))
     return result
コード例 #4
0
def train(agent, env, settings, testfn=None, vizfn=None, savefn=None):
    seeds = _get_seeds_(settings)
    ts = 0
    test_after_episode = False
    viz_after_episode = False

    for ep in range(settings['N_EPISODES']):
        env.seed(next(seeds))
        state = env.reset()
        goal_point = find(state, 'Goal')
        option = Option(goal_point, depth=0)
        agent.reset(env, option, random_seed=3)
        done = False

        while not done:
            action = agent.act(state, option)
            state, reward, done, _ = env.step(action)
            agent.view(Transition(state, action, reward))

            ts += 1

            if settings['TEST_FREQ'] is not None and ts % settings[
                    'TEST_FREQ'] == 0:
                test_after_episode = True
            if settings[
                    'VIZ_FREQ'] is not None and ts % settings['VIZ_FREQ'] == 0:
                viz_after_episode = True

            agent.optimize()

        if test_after_episode:
            testfn(agent, ep, ts)
            test_after_episode = False
        if viz_after_episode:
            vizfn(agent, ep, ts)
            viz_after_episode = False

    if savefn is not None:
        savefn(agent)
コード例 #5
0
    def test(agent: SMARTAgent, ep, ts):
        rewards = [0] * len(seeds)
        for i, seed in enumerate(seeds):
            env.seed(seed)
            state = env.reset()

            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            done = False

            while not done:
                action = agent.act(state)
                state, reward, done, info = env.step(action)

                rewards[seed] += reward

        for i, seed in enumerate(seeds):
            writer.add_scalar(f"Test Reward: {seed}",
                              rewards[i],
                              global_step=ts)
コード例 #6
0
 def generate(self, state: State,
              option: Option[OptionData]) -> List[Option[OptionData]]:
     possibilities: List[Option] = self.env._all_tiles_of_type("Empty")
     return list(
         map(lambda point: Option(point, option.depth + 1), possibilities))
コード例 #7
0
ファイル: minigrid_test.py プロジェクト: blumx116/SMART
    result = []
    option_node = agent.current_option_node
    prev_option = None
    while option_node is not None:
        if prev_option is None or option_node.left == prev_option:
            result.append(option_node.value)
        prev_option = option_node
        option_node = option_node.parent
    return result


images = []
for _ in range(N_EPISODES):
    state = env.reset()
    goal_point = find(state, 'Goal')
    option = Option(goal_point, depth=0)
    agent.reset(env, option, random_seed=3)

    images.append([plt.imshow(env.render('rgb_array'), animated=True)])
    done = False
    while not done:
        action = agent.act(state, option)
        state, reward, done, _ = env.step(action)
        options = get_option_tree(agent)
        print(f"@{onehot2directedpoint(state)} : {reward} => {options}")
        rendered = visualize(env.render('rgb_array'), options)
        images.append([plt.imshow(rendered, animated=True)])

fig = plt.figure()
ani = animation.ArtistAnimation(fig,
                                images,
コード例 #8
0
                                 random_seed=settings['random'])

agent: SMARTAgent = SMARTAgent(evaluator, generator, planning_terminator,
                               policy_terminator, low_level, memory, settings)

step: int = 0
images = []
for seed in [0] * 500:
    env = MazeWorld(cache._get_cached_board(seed))

    total_reward: int = 0
    t: int = 0
    done: bool = False

    state, goal = env.reset(3)
    goal = Option(goal, 0)
    states: List[State] = state
    agent.reset(env, goal)

    while not done:
        print('step')
        action: Action = agent.act(state)
        new_state, reward, done, info = env.step(action)
        total_reward += reward
        states.append(Transition(state, action, reward, new_state))
        state = new_state
        agent.optimize(step)
        t += 1
        step += 1

    def render(env: MazeWorld, state: State):