def main(): config = bootstrap() ############## config[MODEL][ENV] = MAZEBASE config[MODEL][AGENT] = REINFORCE config[MODEL][USE_BASELINE] = True ############## run(config=config)
average_episodic_reward=total_episodic_rewards / i_episode, agent=agent.name, environment=env.name) return agent, optimisers, total_episodic_rewards def run(config): write_config_log(config) env = choose_env(env=config[MODEL][ENV])() possible_actions = env.all_possible_actions() agent = config[MODEL][AGENT] agent = choose_agent(agent_type=agent) \ (config=config, possible_actions=possible_actions) optimisers = agent.get_optimisers(optimiser_name=config[MODEL][OPTIMISER]) total_episodic_rewards = 0.0 for i_episode in range(1, config[MODEL][NUM_EPOCHS] + 1): agent, optimisers, total_episodic_rewards = run_episode( env, agent, optimisers, total_episodic_rewards, i_episode, max_steps_per_episode=config[MODEL][MAX_STEPS_PER_EPISODE]) if __name__ == '__main__': config = bootstrap() run(config)