def test_api(): env = SumoEnvironment(single_agent=True, num_seconds=100000, net_file='nets/single-intersection/single-intersection.net.xml', route_file='nets/single-intersection/single-intersection.rou.xml') env.reset() check_env(env) env.close()
def run(use_gui=True, runs=1): out_csv = 'outputs/double/sarsa-double' env = SumoEnvironment(net_file='nets/double/network.net.xml', single_agent=False, route_file='nets/double/flow.rou.xml', out_csv_name=out_csv, use_gui=use_gui, num_seconds=86400, yellow_time=3, min_green=5, max_green=60) fixed_tl = False agents = { ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id), env.action_spaces(ts_id), alpha=0.000000001, gamma=0.95, epsilon=0.05, lamb=0.1, fourier_order=7) for ts_id in env.ts_ids } for run in range(1, runs + 1): obs = env.reset() done = {'__all__': False} if fixed_tl: while not done['__all__']: _, _, done, _ = env.step(None) else: while not done['__all__']: actions = { ts_id: agents[ts_id].act(obs[ts_id]) for ts_id in obs.keys() } next_obs, r, done, _ = env.step(action=actions) for ts_id in next_obs.keys(): agents[ts_id].learn(state=obs[ts_id], action=actions[ts_id], reward=r[ts_id], next_state=next_obs[ts_id], done=done[ts_id]) obs[ts_id] = next_obs[ts_id] env.save_csv(out_csv, run)
out_csv = 'outputs/2way-single-intersection/sarsa_lambdavai' write_route_file('nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000) env = SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml', single_agent=True, route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs+1): obs = env.reset() agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=7, lamb=0.9) done = False if args.fixed: while not done: _, _, done, _ = env.step({}) else: while not done: action = agent.act(obs) next_obs, r, done, _ = env.step(action=action) agent.learn(state=obs, action=action, reward=r, next_state=next_obs, done=done) obs = next_obs
out_csv = 'outputs/single-intersection/{}_alpha{}_gamma{}_eps{}_decay{}_reward{}'.format( experiment_time, args.alpha, args.gamma, args.epsilon, args.decay, args.reward) env = SumoEnvironment( net_file='nets/single-intersection/single-intersection.net.xml', route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = []