traci.trafficlight.Phase(32, "rrGrrrrrGrrr"), traci.trafficlight.Phase(2, "rryrrrrryrrr"), traci.trafficlight.Phase(32, "rrrGGrrrrGGr"), traci.trafficlight.Phase(2, "rrryyrrrryyr"), traci.trafficlight.Phase(32, "rrrrrGrrrrrG"), traci.trafficlight.Phase(2, "rrrrryrrrrry") ]) if args.reward == 'queue': env._compute_rewards = env._queue_average_reward else: env._compute_rewards = env._waiting_time_reward for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']:
max_green=args.max_green, max_depart_delay=0, phases=[ traci.trafficlight.Phase(ns, "GGrr"), # north-south traci.trafficlight.Phase(2000, "yyrr"), traci.trafficlight.Phase(we, "rrGG"), # west-east traci.trafficlight.Phase(2000, "rryy") ]) if args.reward == 'queue': env._compute_rewards = env._queue_average_reward else: env._compute_rewards = env._waiting_time_reward for run in range(1, args.runs+1): initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids} done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}
max_depart_delay=0, phases=[ traci.trafficlight.Phase(args.ns, "GGrr"), # north-south traci.trafficlight.Phase(2, "yyrr"), traci.trafficlight.Phase(args.we, "rrGG"), # west-east traci.trafficlight.Phase(2, "rryy") ]) if args.reward == 'queue': env._compute_rewards = env._queue_average_reward else: env._compute_rewards = env._waiting_time_reward for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']:
traci.trafficlight.Phase(args.ns, "GGrr"), # north-south traci.trafficlight.Phase(2, "yyrr"), traci.trafficlight.Phase(args.we, "rrGG"), # west-east traci.trafficlight.Phase(2, "rryy") ]) env = VisualizationEnv( env=env, episodic=False, features_names=['Phase 0', 'Phase 1', 'Elapsed time'] + ['Density lane ' + str(i) for i in range(4)] + ['Queue lane ' + str(i) for i in range(4)], actions_names=['Phase 0', 'Phase 1'] ) for run in range(1, args.runs+1): initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids} env.set_agent(ql_agents['t']) env.add_plot('Epsilon', lambda: ql_agents['t'].exploration.epsilon) done = False while not done: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, _ = env.step(action=actions['t']) for agent_id in ql_agents.keys(): ql_agents[agent_id].learn(next_state=env.encode(s), reward=r)