Esempio n. 1
0
def test_api():
    env = SumoEnvironment(single_agent=True,
                          num_seconds=100000,
                          net_file='nets/single-intersection/single-intersection.net.xml',
                          route_file='nets/single-intersection/single-intersection.rou.xml')
    env.reset()
    check_env(env)
    env.close()
Esempio n. 2
0
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

                s, r, done, _ = env.step(action=actions)

                for agent_id in ql_agents.keys():
                    ql_agents[agent_id].learn(next_state=env.encode(
                        s[agent_id], agent_id),
                                              reward=r[agent_id])
        env.save_csv(out_csv, run)
        env.close()