Esempio n. 1
0
    agent = QLearning(env, epsilon=0.8, gamma=0.5, lr=0.01)

    episode_rew = []
    for episode in range(EPISODES):
        # Deciding first action
        action = env.action_space.sample()
        state = env.reset()
        ep_rew = 0
        while True:
            next_state, reward, done, _ = env.step(action)
            # env.render()
            ep_rew += reward

            agent.update((state, action, reward, next_state))
            state = next_state
            agent.get_action(state)

            if done:
                episode_rew.append(ep_rew)
                break
    env.close()

    plt.plot(episode_rew)
    plt.show()

    state = env.reset()
    while True:
        action = agent.get_action(state, explore=False)
        next_state, reward, done, _ = env.step(action)
        env.render()
        state = next_state
Esempio n. 2
0
def run():
    """execute the TraCI control loop"""
    step = 0

    # initialize QLearning
    num_phase = 2
    max_num_car_stopped = 10
    num_lane = 4
    num_wait_time_category = 10
    num_action = 10
    q = QLearning(num_phase, max_num_car_stopped, num_lane, num_action)

    # we start with phase 2 where EW has green
    #traci.trafficlight.setPhase("0", 2)
    while traci.simulation.getMinExpectedNumber() > 0:
        traci.simulationStep()

        #next_action_idx = 9

        # 現在の信号のフェーズ
        light_phase = traci.trafficlight.getPhase("0")

        # 現在のフェーズが黄色かつまだ次のアクションを決めていなかったら、次のフェーズの秒数を決める
        if (light_phase == 1
                or light_phase == 3) and not q.is_calculate_next_action:

            q.is_set_duration = False

            # 次に信号が取るフェーズを取得
            next_light_phase = 0
            if light_phase == 1:
                next_light_phase = 2

            # それぞれのレーンで停まっている車の数
            count_0 = min(traci.lanearea.getLastStepHaltingNumber("0"), 9)
            count_1 = min(traci.lanearea.getLastStepHaltingNumber("1"), 9)
            count_2 = min(traci.lanearea.getLastStepHaltingNumber("2"), 9)
            count_3 = min(traci.lanearea.getLastStepHaltingNumber("3"), 9)

            # 次の信号のフェーズと現在の混雑状況
            current_state_dict = {
                'light_phase': next_light_phase,
                'nums_car_stopped': [count_0, count_1, count_2, count_3]
            }

            current_digitized_state = q.digitize_state(current_state_dict)
            q.next_action_idx = q.get_action(current_digitized_state)
            q.is_calculate_next_action = True

            # reward
            reward = -np.sum(
                [x**1.5 for x in [count_0, count_1, count_2, count_3]])
            q.rewards.append(reward)

            # 各青赤フェーズが終了したタイミングで、以前の状況に対してとったアクションに対するリワードを計算するため、このタイミングで、前回のstateとactionに対するリワードを計算する?

            q.update_Qtable(q.previous_digitized_state, q.previous_action,
                            reward, current_digitized_state)

            q.previous_digitized_state = current_digitized_state
            q.previous_action_idx = q.next_action_idx

        # 現在のフェーズが0か2でかつまだ秒数をセットしていなかったら、秒数をセットする
        if (light_phase == 0 or light_phase == 2) and not q.is_set_duration:
            traci.trafficlight.setPhaseDuration("0",
                                                q.action[q.next_action_idx])
            q.is_set_duration = True
            q.is_calculate_next_action = False
            print("set phase {} for {} seconds".format(
                light_phase, q.action[q.next_action_idx]))

        step += 1
        if step % 10000 == 0:
            plot_graph(q.rewards)

    traci.close()
    sys.stdout.flush()