예제 #1
0
low_level_agent = MinigridBacktrackingAgent()
shape = env.observation_space.shape
shape = (-1, shape[-1], shape[0], shape[1])
v_model = VModel(shape, 32, 2, device=settings['device'])
q_model = QModel(shape, 32, 2, device=settings['device'])
planning_terminator = DepthPlanningTerminator(max_depth=settings['max_depth'])
evaluator = Evaluator(v_model, q_model, planning_terminator, settings, get_beta=lambda step: 3, gamma=0.99)
generator = SimpleMinigridGenerator()
memory = CompleteMemory(max_length=100000)
def goal_met(s, o):
    agent_loc: np.ndarray = s[:, :, 8] # imx, imy, onehot
    agent_loc = np.unravel_index(np.argmax(agent_loc), agent_loc.shape)
    return np.all(agent_loc == o.value)
policy_terminator = StrictGoalTerminator(goal_met)
agent = SMARTAgent(
    evaluator,
    generator,
    planning_terminator,
    policy_terminator=policy_terminator,
    low_level=low_level_agent,
    memory=memory,
    settings=settings)



testfn = training.make_simple_minigrid_test(env, writer, range(5))
vizfn = training.make_visualize(env, writer, range(5))

training.train(agent, env, settings, testfn=testfn, vizfn=vizfn)
training.summarize(agent, env, settings, list(range(10)), writer)
예제 #2
0
fig = plt.figure()
images = []

low_level_agent: IAgent = BacktrackingMazeAgent(env)
low_level_agent: IAgent = Grid2PointWrapper(low_level_agent)
evaluator: IEvaluator = GridworldEvaluator(XDIMS + 2,
                                           YDIMS + 2,
                                           settings,
                                           gamma=0.99)
generator: IGenerator = SimpleGridworldGenerator()
fulfils_goal = lambda state, goal: array_equal(state[:, :, -1], goal[:, :, 0])
goal_manager: IGoalManager = SimpleGoalManager(evaluator, generator, 1,
                                               fulfils_goal)
memory: IMemory = CompleteMemory(100, 3)
agent = SMARTAgent(goal_manager, low_level_agent, memory)

totals = []

step: int = 0

for iter, seed in enumerate([0] * 500):
    total_reward: int = 0
    print(f"================={seed}=================")
    env = MazeWorld(cache._get_cached_board(seed))

    state, goal = env.reset(3)
    agent.reset(env, state, goal)
    done = False
    states: List[State] = [state]
예제 #3
0
get_beta = lambda step: 0.001 * step
evaluator: IEvaluator = SimpleMazeworldEvaluator(planning_terminator,
                                                 v_model,
                                                 q_model,
                                                 settings,
                                                 get_beta,
                                                 gamma=0.99)

generator: IGenerator = SimpleMazeworldGenerator()

low_level: IOptionBasedAgent = BacktrackingMazeAgent(env)

memory: IMemory = CompleteMemory(max_length=100,
                                 random_seed=settings['random'])

agent: SMARTAgent = SMARTAgent(evaluator, generator, planning_terminator,
                               policy_terminator, low_level, memory, settings)

step: int = 0
images = []
for seed in [0] * 500:
    env = MazeWorld(cache._get_cached_board(seed))

    total_reward: int = 0
    t: int = 0
    done: bool = False

    state, goal = env.reset(3)
    goal = Option(goal, 0)
    states: List[State] = state
    agent.reset(env, goal)