agent = QLearning(env, epsilon=0.8, gamma=0.5, lr=0.01) episode_rew = [] for episode in range(EPISODES): # Deciding first action action = env.action_space.sample() state = env.reset() ep_rew = 0 while True: next_state, reward, done, _ = env.step(action) # env.render() ep_rew += reward agent.update((state, action, reward, next_state)) state = next_state agent.get_action(state) if done: episode_rew.append(ep_rew) break env.close() plt.plot(episode_rew) plt.show() state = env.reset() while True: action = agent.get_action(state, explore=False) next_state, reward, done, _ = env.step(action) env.render() state = next_state
def run(): """execute the TraCI control loop""" step = 0 # initialize QLearning num_phase = 2 max_num_car_stopped = 10 num_lane = 4 num_wait_time_category = 10 num_action = 10 q = QLearning(num_phase, max_num_car_stopped, num_lane, num_action) # we start with phase 2 where EW has green #traci.trafficlight.setPhase("0", 2) while traci.simulation.getMinExpectedNumber() > 0: traci.simulationStep() #next_action_idx = 9 # 現在の信号のフェーズ light_phase = traci.trafficlight.getPhase("0") # 現在のフェーズが黄色かつまだ次のアクションを決めていなかったら、次のフェーズの秒数を決める if (light_phase == 1 or light_phase == 3) and not q.is_calculate_next_action: q.is_set_duration = False # 次に信号が取るフェーズを取得 next_light_phase = 0 if light_phase == 1: next_light_phase = 2 # それぞれのレーンで停まっている車の数 count_0 = min(traci.lanearea.getLastStepHaltingNumber("0"), 9) count_1 = min(traci.lanearea.getLastStepHaltingNumber("1"), 9) count_2 = min(traci.lanearea.getLastStepHaltingNumber("2"), 9) count_3 = min(traci.lanearea.getLastStepHaltingNumber("3"), 9) # 次の信号のフェーズと現在の混雑状況 current_state_dict = { 'light_phase': next_light_phase, 'nums_car_stopped': [count_0, count_1, count_2, count_3] } current_digitized_state = q.digitize_state(current_state_dict) q.next_action_idx = q.get_action(current_digitized_state) q.is_calculate_next_action = True # reward reward = -np.sum( [x**1.5 for x in [count_0, count_1, count_2, count_3]]) q.rewards.append(reward) # 各青赤フェーズが終了したタイミングで、以前の状況に対してとったアクションに対するリワードを計算するため、このタイミングで、前回のstateとactionに対するリワードを計算する? q.update_Qtable(q.previous_digitized_state, q.previous_action, reward, current_digitized_state) q.previous_digitized_state = current_digitized_state q.previous_action_idx = q.next_action_idx # 現在のフェーズが0か2でかつまだ秒数をセットしていなかったら、秒数をセットする if (light_phase == 0 or light_phase == 2) and not q.is_set_duration: traci.trafficlight.setPhaseDuration("0", q.action[q.next_action_idx]) q.is_set_duration = True q.is_calculate_next_action = False print("set phase {} for {} seconds".format( light_phase, q.action[q.next_action_idx])) step += 1 if step % 10000 == 0: plot_graph(q.rewards) traci.close() sys.stdout.flush()