max_green=args.max_green, max_depart_delay=0, time_to_load_vehicles=120, phases=[ traci.trafficlight.Phase(32, "GGrrrrGGrrrr"), traci.trafficlight.Phase(2, "yyrrrryyrrrr"), traci.trafficlight.Phase(32, "rrGrrrrrGrrr"), traci.trafficlight.Phase(2, "rryrrrrryrrr"), traci.trafficlight.Phase(32, "rrrGGrrrrGGr"), traci.trafficlight.Phase(2, "rrryyrrrryyr"), traci.trafficlight.Phase(32, "rrrrrGrrrrrG"), traci.trafficlight.Phase(2, "rrrrryrrrrry") ]) for run in range(1, args.runs+1): obs = env.reset() agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=21) done = False if args.fixed: while not done: _, _, done, _ = env.step({}) else: while not done: action = agent.act(agent.get_features(obs)) next_obs, r, done, _ = env.step(action=action) agent.learn(state=obs, action=action, reward=r, next_state=next_obs, done=done) obs = next_obs
traci.trafficlight.Phase(32, "GGrrrrGGrrrr"), traci.trafficlight.Phase(2, "yyrrrryyrrrr"), traci.trafficlight.Phase(32, "rrGrrrrrGrrr"), traci.trafficlight.Phase(2, "rryrrrrryrrr"), traci.trafficlight.Phase(32, "rrrGGrrrrGGr"), traci.trafficlight.Phase(2, "rrryyrrrryyr"), traci.trafficlight.Phase(32, "rrrrrGrrrrrG"), traci.trafficlight.Phase(2, "rrrrryrrrrry") ]) if args.reward == 'queue': env._compute_rewards = env._queue_average_reward else: env._compute_rewards = env._waiting_time_reward for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = []