コード例 #1
0
def summarize(agent, env, settings: Dict[str, Any], seeds: List[int],
              writer: SummaryWriter):
    rewards = [0] * len(seeds)
    for i, seed in enumerate(seeds):
        if seed is not None:
            env.seed(seed)
        state = env.reset()
        goal_point = find(state, 'Goal')
        option = Option(goal_point, depth=0)
        agent.reset(env, option, random_seed=3)

        done = False
        while not done:
            action = agent.act(state, option)
            state, reward, done, _ = env.step(action)

            rewards[i] += reward

    writer.add_hparams(
        {
            key: value
            for (key, value) in settings.items() if key not in ['device']
        }, {
            'average reward': np.mean(rewards),
            'min reward': np.min(rewards),
            'max reward': np.max(rewards)
        })
コード例 #2
0
    def visualize(agent: SMARTAgent, ep: int, ts: int):
        images = []
        for seed in seeds:
            if seed is not None:
                env.seed(seed)
            state = env.reset()
            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            visualize_decision(agent, state, writer, f'likelihoods: {seed}',
                               ep, ts)

            images.append(env.render('rgb_array'))
            done = False
            while not done:
                action = agent.act(state, option)
                state, reward, done, _ = env.step(action)
                options = _get_option_tree_(agent)
                print(
                    f"@{onehot2directedpoint(state)} : {reward} => {options}")
                rendered = _render_options_(env.render('rgb_array'), options)
                images.append(rendered)
        gif = np.stack(images, 0)
        # np.ndarray [t, imx, imy, 3]
        gif_tensor: torch.Tensor = torch.from_numpy(gif).type(
            torch.uint8).unsqueeze(0)
        # torch.Tensor[uint8] [1, t, imx, imy, 3]
        gif_tensor = gif_tensor.permute(0, 1, 4, 2, 3)
        writer.add_video('sample trajectory', gif_tensor, global_step=ts)
コード例 #3
0
 def _add_potential_targets_(self, state: OneHotImg, goal: Point) -> None:
     location: Point = find(state, 'Agent')
     possibilities: Iterable[Point] = map(
         lambda dxdy: location + dxdy, MinigridBacktrackingAgent.directions)
     possibilities = filter(
         lambda point: self._is_valid_point_(state, point), possibilities)
     # ^ doesn't appear to do anything b/c minigrid has walls on the edges
     possibilities = filter(lambda point: tile_type(state, point) != "Wall",
                            possibilities)
     possibilities = filter(lambda point: point not in self.visited,
                            possibilities)
     for point in possibilities:
         path_to: List[DirectedPoint] = self._navigate_to_(
             From=onehot2directedpoint(state), To=point)
         distance: float = self._distance_(point, goal)
         self.waypoints.push(self._join_paths_(self.history, path_to),
                             distance)
コード例 #4
0
def train(agent, env, settings, testfn=None, vizfn=None, savefn=None):
    seeds = _get_seeds_(settings)
    ts = 0
    test_after_episode = False
    viz_after_episode = False

    for ep in range(settings['N_EPISODES']):
        env.seed(next(seeds))
        state = env.reset()
        goal_point = find(state, 'Goal')
        option = Option(goal_point, depth=0)
        agent.reset(env, option, random_seed=3)
        done = False

        while not done:
            action = agent.act(state, option)
            state, reward, done, _ = env.step(action)
            agent.view(Transition(state, action, reward))

            ts += 1

            if settings['TEST_FREQ'] is not None and ts % settings[
                    'TEST_FREQ'] == 0:
                test_after_episode = True
            if settings[
                    'VIZ_FREQ'] is not None and ts % settings['VIZ_FREQ'] == 0:
                viz_after_episode = True

            agent.optimize()

        if test_after_episode:
            testfn(agent, ep, ts)
            test_after_episode = False
        if viz_after_episode:
            vizfn(agent, ep, ts)
            viz_after_episode = False

    if savefn is not None:
        savefn(agent)
コード例 #5
0
    def test(agent: SMARTAgent, ep, ts):
        rewards = [0] * len(seeds)
        for i, seed in enumerate(seeds):
            env.seed(seed)
            state = env.reset()

            goal_point = find(state, 'Goal')
            option = Option(goal_point, depth=0)
            agent.reset(env, option, random_seed=3)

            done = False

            while not done:
                action = agent.act(state)
                state, reward, done, info = env.step(action)

                rewards[seed] += reward

        for i, seed in enumerate(seeds):
            writer.add_scalar(f"Test Reward: {seed}",
                              rewards[i],
                              global_step=ts)
コード例 #6
0
 def _achieved_goal_(state: OneHotImg, goal: Point) -> bool:
     location: Point = find(state, 'Agent')
     return np.array_equal(location, goal)
コード例 #7
0
ファイル: minigrid_test.py プロジェクト: blumx116/SMART
def get_option_tree(agent):
    result = []
    option_node = agent.current_option_node
    prev_option = None
    while option_node is not None:
        if prev_option is None or option_node.left == prev_option:
            result.append(option_node.value)
        prev_option = option_node
        option_node = option_node.parent
    return result


images = []
for _ in range(N_EPISODES):
    state = env.reset()
    goal_point = find(state, 'Goal')
    option = Option(goal_point, depth=0)
    agent.reset(env, option, random_seed=3)

    images.append([plt.imshow(env.render('rgb_array'), animated=True)])
    done = False
    while not done:
        action = agent.act(state, option)
        state, reward, done, _ = env.step(action)
        options = get_option_tree(agent)
        print(f"@{onehot2directedpoint(state)} : {reward} => {options}")
        rendered = visualize(env.render('rgb_array'), options)
        images.append([plt.imshow(rendered, animated=True)])

fig = plt.figure()
ani = animation.ArtistAnimation(fig,