def run(use_gui=True, runs=1): out_csv = 'outputs/double/sarsa-double' env = SumoEnvironment(net_file='nets/double/network.net.xml', single_agent=False, route_file='nets/double/flow.rou.xml', out_csv_name=out_csv, use_gui=use_gui, num_seconds=86400, yellow_time=3, min_green=5, max_green=60) fixed_tl = False agents = { ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id), env.action_spaces(ts_id), alpha=0.000000001, gamma=0.95, epsilon=0.05, lamb=0.1, fourier_order=7) for ts_id in env.ts_ids } for run in range(1, runs + 1): obs = env.reset() done = {'__all__': False} if fixed_tl: while not done['__all__']: _, _, done, _ = env.step(None) else: while not done['__all__']: actions = { ts_id: agents[ts_id].act(obs[ts_id]) for ts_id in obs.keys() } next_obs, r, done, _ = env.step(action=actions) for ts_id in next_obs.keys(): agents[ts_id].learn(state=obs[ts_id], action=actions[ts_id], reward=r[ts_id], next_state=next_obs[ts_id], done=done[ts_id]) obs[ts_id] = next_obs[ts_id] env.save_csv(out_csv, run)
out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs+1): obs = env.reset() agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=7, lamb=0.9) done = False if args.fixed: while not done: _, _, done, _ = env.step({}) else: while not done: action = agent.act(obs) next_obs, r, done, _ = env.step(action=action) agent.learn(state=obs, action=action, reward=r, next_state=next_obs, done=done) obs = next_obs env.save_csv(out_csv, run)
initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_spaces(ts), action_space=env.action_spaces(ts), alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } infos = [] done = {'__all__': False} if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(action=actions) for agent_id in s.keys(): ql_agents[agent_id].learn(next_state=env.encode( s[agent_id], agent_id), reward=r[agent_id]) env.save_csv(out_csv_name=out_csv, run=1)
ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids } for run in range(1, runs + 1): if run != 1: initial_states = env.reset() for ts in initial_states.keys(): ql_agents[ts].state = env.encode(initial_states[ts], ts) infos = [] done = {'__all__': False} while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(action=actions) for agent_id in s.keys(): ql_agents[agent_id].learn(next_state=env.encode( s[agent_id], agent_id), reward=r[agent_id]) env.save_csv('outputs/4x4/ql-test-995speed', run) env.close()
runs = 1 env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=True, num_seconds=80000, max_depart_delay=0) for run in range(1, runs+1): initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids} infos = [] done = {'__all__': False} while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(action=actions) for agent_id in s.keys(): ql_agents[agent_id].learn(next_state=env.encode(s[agent_id], agent_id), reward=r[agent_id]) env.save_csv('outputs/4x4/ql_test', run) env.close()