コード例 #1
0
ファイル: sarsa_double.py プロジェクト: LucasAlegre/sumo-rl
def run(use_gui=True, runs=1):
    out_csv = 'outputs/double/sarsa-double'

    env = SumoEnvironment(net_file='nets/double/network.net.xml',
                          single_agent=False,
                          route_file='nets/double/flow.rou.xml',
                          out_csv_name=out_csv,
                          use_gui=use_gui,
                          num_seconds=86400,
                          yellow_time=3,
                          min_green=5,
                          max_green=60)

    fixed_tl = False
    agents = {
        ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id),
                                     env.action_spaces(ts_id),
                                     alpha=0.000000001,
                                     gamma=0.95,
                                     epsilon=0.05,
                                     lamb=0.1,
                                     fourier_order=7)
        for ts_id in env.ts_ids
    }

    for run in range(1, runs + 1):
        obs = env.reset()
        done = {'__all__': False}

        if fixed_tl:
            while not done['__all__']:
                _, _, done, _ = env.step(None)
        else:
            while not done['__all__']:
                actions = {
                    ts_id: agents[ts_id].act(obs[ts_id])
                    for ts_id in obs.keys()
                }

                next_obs, r, done, _ = env.step(action=actions)

                for ts_id in next_obs.keys():
                    agents[ts_id].learn(state=obs[ts_id],
                                        action=actions[ts_id],
                                        reward=r[ts_id],
                                        next_state=next_obs[ts_id],
                                        done=done[ts_id])
                    obs[ts_id] = next_obs[ts_id]

        env.save_csv(out_csv, run)
コード例 #2
0
    out_csv = f'outputs/5x5-Raphael/{scenario}_{experiment_time}_alpha{args.alpha}_gamma{args.gamma}_eps{args.epsilon}_decay{args.decay}'

    env = SumoEnvironment(net_file=args.network,
                          route_file=args.route,
                          out_csv_name=out_csv,
                          use_gui=args.gui,
                          num_seconds=args.seconds,
                          min_green=args.min_green,
                          max_green=args.max_green,
                          max_depart_delay=0)

    initial_states = env.reset()
    ql_agents = {
        ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                    state_space=env.observation_spaces(ts),
                    action_space=env.action_spaces(ts),
                    alpha=args.alpha,
                    gamma=args.gamma,
                    exploration_strategy=EpsilonGreedy(
                        initial_epsilon=args.epsilon,
                        min_epsilon=args.min_epsilon,
                        decay=args.decay))
        for ts in env.ts_ids
    }
    infos = []
    done = {'__all__': False}
    if args.fixed:
        while not done['__all__']:
            _, _, done, _ = env.step({})
    else:
        while not done['__all__']: