def test_api(): env = sumo_rl.env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/test', use_gui=False, num_seconds=80000) api_test(env) env.close()
def ingolstadt1(parallel=True, **kwargs): """ Number of agents: 1 Number of actions: 3 """ kwargs.update({ 'net_file': PATH + '/../nets/RESCO/ingolstadt1/ingolstadt1.net.xml', 'route_file': PATH + '/../nets/RESCO/ingolstadt1/ingolstadt1.rou.xml', 'begin_time': 57600, 'num_seconds': 61200 }) if parallel: return parallel_env(**kwargs) else: return env(**kwargs)
def cologne8(parallel=True, **kwargs): """ Number of agents: 8 Number of actions: variable """ kwargs.update({ 'net_file': PATH + '/../nets/RESCO/cologne8/cologne8.net.xml', 'route_file': PATH + '/../nets/RESCO/cologne8/cologne8.rou.xml', 'begin_time': 25200, 'num_seconds': 28800 }) if parallel: return parallel_env(**kwargs) else: return env(**kwargs)
def grid4x4(parallel=True, **kwargs): """ Number of agents = 16 Number of actions = 4 Agents have the same obsevation and action space """ kwargs.update({ 'net_file': PATH + '/../nets/RESCO/grid4x4/grid4x4.net.xml', 'route_file': PATH + '/../nets/RESCO/grid4x4/grid4x4_1.rou.xml', 'num_seconds': 3600 }) if parallel: return parallel_env(**kwargs) else: return env(**kwargs)
def cologne3(parallel=True, **kwargs): """ Number of agents: 3 Number of actions: 2 agents with 4 actions and 1 agent with 3 actions 2 agents have the same obsevation and action space and 1 has different spaces """ kwargs.update({ 'net_file': PATH + '/../nets/RESCO/cologne3/cologne3.net.xml', 'route_file': PATH + '/../nets/RESCO/cologne3/cologne3.rou.xml', 'begin_time': 25200, 'num_seconds': 28800 }) if parallel: return parallel_env(**kwargs) else: return env(**kwargs)
import traci import sumo_rl from sumo_rl.agents import QLAgent from sumo_rl.exploration import EpsilonGreedy if __name__ == '__main__': alpha = 0.1 gamma = 0.99 decay = 1 runs = 1 env = sumo_rl.env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=False, min_green=8, delta_time=5, num_seconds=80000) for run in range(1, runs + 1): env.reset() initial_states = {ts: env.observe(ts) for ts in env.agents} ql_agents = { ts: QLAgent(starting_state=env.unwrapped.env.encode( initial_states[ts], ts), state_space=env.observation_space(ts), action_space=env.action_space(ts), alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
from ray.rllib.agents.a3c.a3c import A3CTrainer from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy from ray.rllib.env import PettingZooEnv from ray.tune.registry import register_env from gym import spaces import numpy as np import sumo_rl import traci if __name__ == '__main__': ray.init() register_env("4x4grid", lambda _: PettingZooEnv(sumo_rl.env(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/a3c', use_gui=False, num_seconds=80000))) trainer = A3CTrainer(env="4x4grid", config={ "multiagent": { "policies": { '0': (A3CTFPolicy, spaces.Box(low=np.zeros(11), high=np.ones(11)), spaces.Discrete(2), {}) }, "policy_mapping_fn": (lambda id: '0') # Traffic lights are always controlled by this policy }, "lr": 0.001, "no_done_at_end": True }) while True: print(trainer.train()) # distributed training step