def summarize(agent, env, settings: Dict[str, Any], seeds: List[int], writer: SummaryWriter): rewards = [0] * len(seeds) for i, seed in enumerate(seeds): if seed is not None: env.seed(seed) state = env.reset() goal_point = find(state, 'Goal') option = Option(goal_point, depth=0) agent.reset(env, option, random_seed=3) done = False while not done: action = agent.act(state, option) state, reward, done, _ = env.step(action) rewards[i] += reward writer.add_hparams( { key: value for (key, value) in settings.items() if key not in ['device'] }, { 'average reward': np.mean(rewards), 'min reward': np.min(rewards), 'max reward': np.max(rewards) })
def visualize(agent: SMARTAgent, ep: int, ts: int): images = [] for seed in seeds: if seed is not None: env.seed(seed) state = env.reset() goal_point = find(state, 'Goal') option = Option(goal_point, depth=0) agent.reset(env, option, random_seed=3) visualize_decision(agent, state, writer, f'likelihoods: {seed}', ep, ts) images.append(env.render('rgb_array')) done = False while not done: action = agent.act(state, option) state, reward, done, _ = env.step(action) options = _get_option_tree_(agent) print( f"@{onehot2directedpoint(state)} : {reward} => {options}") rendered = _render_options_(env.render('rgb_array'), options) images.append(rendered) gif = np.stack(images, 0) # np.ndarray [t, imx, imy, 3] gif_tensor: torch.Tensor = torch.from_numpy(gif).type( torch.uint8).unsqueeze(0) # torch.Tensor[uint8] [1, t, imx, imy, 3] gif_tensor = gif_tensor.permute(0, 1, 4, 2, 3) writer.add_video('sample trajectory', gif_tensor, global_step=ts)
def _add_potential_targets_(self, state: OneHotImg, goal: Point) -> None: location: Point = find(state, 'Agent') possibilities: Iterable[Point] = map( lambda dxdy: location + dxdy, MinigridBacktrackingAgent.directions) possibilities = filter( lambda point: self._is_valid_point_(state, point), possibilities) # ^ doesn't appear to do anything b/c minigrid has walls on the edges possibilities = filter(lambda point: tile_type(state, point) != "Wall", possibilities) possibilities = filter(lambda point: point not in self.visited, possibilities) for point in possibilities: path_to: List[DirectedPoint] = self._navigate_to_( From=onehot2directedpoint(state), To=point) distance: float = self._distance_(point, goal) self.waypoints.push(self._join_paths_(self.history, path_to), distance)
def train(agent, env, settings, testfn=None, vizfn=None, savefn=None): seeds = _get_seeds_(settings) ts = 0 test_after_episode = False viz_after_episode = False for ep in range(settings['N_EPISODES']): env.seed(next(seeds)) state = env.reset() goal_point = find(state, 'Goal') option = Option(goal_point, depth=0) agent.reset(env, option, random_seed=3) done = False while not done: action = agent.act(state, option) state, reward, done, _ = env.step(action) agent.view(Transition(state, action, reward)) ts += 1 if settings['TEST_FREQ'] is not None and ts % settings[ 'TEST_FREQ'] == 0: test_after_episode = True if settings[ 'VIZ_FREQ'] is not None and ts % settings['VIZ_FREQ'] == 0: viz_after_episode = True agent.optimize() if test_after_episode: testfn(agent, ep, ts) test_after_episode = False if viz_after_episode: vizfn(agent, ep, ts) viz_after_episode = False if savefn is not None: savefn(agent)
def test(agent: SMARTAgent, ep, ts): rewards = [0] * len(seeds) for i, seed in enumerate(seeds): env.seed(seed) state = env.reset() goal_point = find(state, 'Goal') option = Option(goal_point, depth=0) agent.reset(env, option, random_seed=3) done = False while not done: action = agent.act(state) state, reward, done, info = env.step(action) rewards[seed] += reward for i, seed in enumerate(seeds): writer.add_scalar(f"Test Reward: {seed}", rewards[i], global_step=ts)
def _achieved_goal_(state: OneHotImg, goal: Point) -> bool: location: Point = find(state, 'Agent') return np.array_equal(location, goal)
def get_option_tree(agent): result = [] option_node = agent.current_option_node prev_option = None while option_node is not None: if prev_option is None or option_node.left == prev_option: result.append(option_node.value) prev_option = option_node option_node = option_node.parent return result images = [] for _ in range(N_EPISODES): state = env.reset() goal_point = find(state, 'Goal') option = Option(goal_point, depth=0) agent.reset(env, option, random_seed=3) images.append([plt.imshow(env.render('rgb_array'), animated=True)]) done = False while not done: action = agent.act(state, option) state, reward, done, _ = env.step(action) options = get_option_tree(agent) print(f"@{onehot2directedpoint(state)} : {reward} => {options}") rendered = visualize(env.render('rgb_array'), options) images.append([plt.imshow(rendered, animated=True)]) fig = plt.figure() ani = animation.ArtistAnimation(fig,