Ejemplo n.º 1
0
 def __init__(self,
              starting_state,
              state_space,
              action_space,
              alpha=0.5,
              gamma=0.95,
              exploration_strategy=EpsilonGreedy()):
     super(QLAgent, self).__init__(state_space, action_space)
     self.state = starting_state
     self.action_space = action_space
     self.action = None
     self.alpha = alpha
     self.gamma = gamma
     self.q_table = {self.state: [0 for _ in range(action_space.n)]}
     self.exploration = exploration_strategy
     self.acc_reward = 0
        ])
    if args.reward == 'queue':
        env._compute_rewards = env._queue_average_reward
    else:
        env._compute_rewards = env._waiting_time_reward

    for run in range(1, args.runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts]),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

                s, r, done, _ = env.step(actions=actions)
Ejemplo n.º 3
0
            traci.trafficlight.Phase(35000, 35000, 35000,
                                     "rrrGGG"),  # west-east
            traci.trafficlight.Phase(2000, 2000, 2000, "rrryyy")
        ])

    for run in range(1, runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts:
            QLAgent(starting_state=env.encode(initial_states[ts]),
                    state_space=env.observation_space,
                    action_space=env.action_space,
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
            for ts in env.ts_ids
        }
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(actions=actions)
            infos.append(info)

            for agent_id in ql_agents.keys():
                ql_agents[agent_id].learn(new_state=env.encode(s[agent_id]),
                                          reward=r[agent_id])
                            traci.trafficlight.Phase(32000, 32000, 32000, "rrrrrGrrrrrG"), 
                            traci.trafficlight.Phase(2000, 2000, 2000, "rrrrryrrrrry")
                            ])
    if args.reward == 'queue':
        env._compute_rewards = env._queue_average_reward
    else:
        env._compute_rewards = env._waiting_time_reward

    for run in range(1, args.runs+1):
        initial_states = env.reset()
        ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts]),
                                 state_space=env.observation_space,
                                 action_space=env.action_space,
                                 alpha=args.alpha,
                                 gamma=args.gamma,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids}

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

                s, r, done, _ = env.step(actions=actions)

                if args.v:
                    print('s=', env.radix_decode(ql_agents['t'].state), 'a=', actions['t'], 's\'=', env.radix_encode(s['t']), 'r=', r['t'])
Ejemplo n.º 5
0
                          max_depart_delay=0,
                          phases=[
                            traci.trafficlight.Phase(35, "GGGrrr"),   # north-south
                            traci.trafficlight.Phase(2, "yyyrrr"),
                            traci.trafficlight.Phase(35, "rrrGGG"),   # west-east
                            traci.trafficlight.Phase(2, "rrryyy")
                            ])

    for run in range(1, runs+1):
        initial_states = env.reset()
        ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts]),
                                 state_space=env.observation_space,
                                 action_space=env.action_space,
                                 alpha=alpha,
                                 gamma=gamma,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids}
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(actions=actions)
            infos.append(info)

            for agent_id in ql_agents.keys():
                ql_agents[agent_id].learn(new_state=env.encode(s[agent_id]), reward=r[agent_id])

        env.close()

        df = pd.DataFrame(infos)
        df.to_csv('outputs/4x4grid/c2_alpha{}_gamma{}_decay{}_run{}.csv'.format(alpha, gamma, decay, run), index=False)