low_level_agent = MinigridBacktrackingAgent() shape = env.observation_space.shape shape = (-1, shape[-1], shape[0], shape[1]) v_model = VModel(shape, 32, 2, device=settings['device']) q_model = QModel(shape, 32, 2, device=settings['device']) planning_terminator = DepthPlanningTerminator(max_depth=settings['max_depth']) evaluator = Evaluator(v_model, q_model, planning_terminator, settings, get_beta=lambda step: 3, gamma=0.99) generator = SimpleMinigridGenerator() memory = CompleteMemory(max_length=100000) def goal_met(s, o): agent_loc: np.ndarray = s[:, :, 8] # imx, imy, onehot agent_loc = np.unravel_index(np.argmax(agent_loc), agent_loc.shape) return np.all(agent_loc == o.value) policy_terminator = StrictGoalTerminator(goal_met) agent = SMARTAgent( evaluator, generator, planning_terminator, policy_terminator=policy_terminator, low_level=low_level_agent, memory=memory, settings=settings) testfn = training.make_simple_minigrid_test(env, writer, range(5)) vizfn = training.make_visualize(env, writer, range(5)) training.train(agent, env, settings, testfn=testfn, vizfn=vizfn) training.summarize(agent, env, settings, list(range(10)), writer)
fig = plt.figure() images = [] low_level_agent: IAgent = BacktrackingMazeAgent(env) low_level_agent: IAgent = Grid2PointWrapper(low_level_agent) evaluator: IEvaluator = GridworldEvaluator(XDIMS + 2, YDIMS + 2, settings, gamma=0.99) generator: IGenerator = SimpleGridworldGenerator() fulfils_goal = lambda state, goal: array_equal(state[:, :, -1], goal[:, :, 0]) goal_manager: IGoalManager = SimpleGoalManager(evaluator, generator, 1, fulfils_goal) memory: IMemory = CompleteMemory(100, 3) agent = SMARTAgent(goal_manager, low_level_agent, memory) totals = [] step: int = 0 for iter, seed in enumerate([0] * 500): total_reward: int = 0 print(f"================={seed}=================") env = MazeWorld(cache._get_cached_board(seed)) state, goal = env.reset(3) agent.reset(env, state, goal) done = False states: List[State] = [state]
get_beta = lambda step: 0.001 * step evaluator: IEvaluator = SimpleMazeworldEvaluator(planning_terminator, v_model, q_model, settings, get_beta, gamma=0.99) generator: IGenerator = SimpleMazeworldGenerator() low_level: IOptionBasedAgent = BacktrackingMazeAgent(env) memory: IMemory = CompleteMemory(max_length=100, random_seed=settings['random']) agent: SMARTAgent = SMARTAgent(evaluator, generator, planning_terminator, policy_terminator, low_level, memory, settings) step: int = 0 images = [] for seed in [0] * 500: env = MazeWorld(cache._get_cached_board(seed)) total_reward: int = 0 t: int = 0 done: bool = False state, goal = env.reset(3) goal = Option(goal, 0) states: List[State] = state agent.reset(env, goal)