def test_api(): env = SumoEnvironment(single_agent=True, num_seconds=100000, net_file='nets/single-intersection/single-intersection.net.xml', route_file='nets/single-intersection/single-intersection.rou.xml') env.reset() check_env(env) env.close()
ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, _ = env.step(action=actions) for agent_id in ql_agents.keys(): ql_agents[agent_id].learn(next_state=env.encode( s[agent_id], agent_id), reward=r[agent_id]) env.save_csv(out_csv, run) env.close()