Ejemplo n.º 1
0
def run(use_gui=True, runs=1):
    out_csv = 'outputs/double/sarsa-double'

    env = SumoEnvironment(net_file='nets/double/network.net.xml',
                          single_agent=False,
                          route_file='nets/double/flow.rou.xml',
                          out_csv_name=out_csv,
                          use_gui=use_gui,
                          num_seconds=86400,
                          yellow_time=3,
                          min_green=5,
                          max_green=60)

    fixed_tl = False
    agents = {
        ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id),
                                     env.action_spaces(ts_id),
                                     alpha=0.000000001,
                                     gamma=0.95,
                                     epsilon=0.05,
                                     lamb=0.1,
                                     fourier_order=7)
        for ts_id in env.ts_ids
    }

    for run in range(1, runs + 1):
        obs = env.reset()
        done = {'__all__': False}

        if fixed_tl:
            while not done['__all__']:
                _, _, done, _ = env.step(None)
        else:
            while not done['__all__']:
                actions = {
                    ts_id: agents[ts_id].act(obs[ts_id])
                    for ts_id in obs.keys()
                }

                next_obs, r, done, _ = env.step(action=actions)

                for ts_id in next_obs.keys():
                    agents[ts_id].learn(state=obs[ts_id],
                                        action=actions[ts_id],
                                        reward=r[ts_id],
                                        next_state=next_obs[ts_id],
                                        done=done[ts_id])
                    obs[ts_id] = next_obs[ts_id]

        env.save_csv(out_csv, run)
Ejemplo n.º 2
0
                          route_file=args.route,
                          out_csv_name=out_csv,
                          use_gui=args.gui,
                          num_seconds=args.seconds,
                          min_green=args.min_green,
                          max_green=args.max_green,
                          max_depart_delay=0)

    for run in range(1, args.runs+1):
        obs = env.reset()
        agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=7, lamb=0.9)
        
        done = False
        if args.fixed:
            while not done:
                _, _, done, _ = env.step({})
        else:
            while not done:
                action = agent.act(obs)

                next_obs, r, done, _ = env.step(action=action)

                agent.learn(state=obs, action=action, reward=r, next_state=next_obs, done=done)

                obs = next_obs

        env.save_csv(out_csv, run)



Ejemplo n.º 3
0
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

                s, r, done, _ = env.step(action=actions)

                for agent_id in ql_agents.keys():
                    ql_agents[agent_id].learn(next_state=env.encode(
                        s[agent_id], agent_id),
                                              reward=r[agent_id])
        env.save_csv(out_csv, run)
        env.close()
Ejemplo n.º 4
0
        ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                    state_space=env.observation_space,
                    action_space=env.action_space,
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
        for ts in env.ts_ids
    }
    for run in range(1, runs + 1):
        if run != 1:
            initial_states = env.reset()
            for ts in initial_states.keys():
                ql_agents[ts].state = env.encode(initial_states[ts], ts)

        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(action=actions)

            for agent_id in s.keys():
                ql_agents[agent_id].learn(next_state=env.encode(
                    s[agent_id], agent_id),
                                          reward=r[agent_id])

        env.save_csv('outputs/4x4/ql-test-995speed', run)
        env.close()