Esempi in Python per QLearning.get_action

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: q_learning

Classe/tipologia: QLearning

Metodo/funzione: get_action

Esempi su hotexamples.com: 2

QLearning.get_action in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per q_learning.QLearning.get_action, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

QLearning(30)

update(4)

run(3)

train(2)

load_table(2)

learn(2)

greedy_probability_policy(2)

get_policy(2)

get_action(2)

perform_sim_step(2)

set_general_state_action_values(2)

save_q_matrix(2)

choose_action(2)

action_to_maximise_q(2)

choose(2)

train_model(1)

reset_state(1)

plot_progress(1)

predict(1)

previous_action_idx(1)

previous_digitized_state(1)

q_table(1)

read_q_matrix(1)

update_state_action_function(1)

reset_epsilon(1)

reset_reward(1)

update_reward(1)

update_Qtable(1)

update_q_value(1)

transfer_model(1)

run_multiple_episodes(1)

sample_from_experience(1)

update_Q(1)

save(1)

save_experience(1)

save_q_model(1)

save_table(1)

select_action(1)

solve(1)

step(1)

test(1)

plot_avg_cost(1)

lr(1)

perform_lr_decay(1)

episode_companies_3(1)

action_values(1)

add_new_state(1)

assimilar(1)

best_action(1)

calc_new_q_value(1)

Esempio n. 1

Mostra file

    agent = QLearning(env, epsilon=0.8, gamma=0.5, lr=0.01)

    episode_rew = []
    for episode in range(EPISODES):
        # Deciding first action
        action = env.action_space.sample()
        state = env.reset()
        ep_rew = 0
        while True:
            next_state, reward, done, _ = env.step(action)
            # env.render()
            ep_rew += reward

            agent.update((state, action, reward, next_state))
            state = next_state
            agent.get_action(state)

            if done:
                episode_rew.append(ep_rew)
                break
    env.close()

    plt.plot(episode_rew)
    plt.show()

    state = env.reset()
    while True:
        action = agent.get_action(state, explore=False)
        next_state, reward, done, _ = env.step(action)
        env.render()
        state = next_state

Esempio n. 2

Mostra file

File: runner_3.py Progetto: h-shib/traci_test

def run():
    """execute the TraCI control loop"""
    step = 0

    # initialize QLearning
    num_phase = 2
    max_num_car_stopped = 10
    num_lane = 4
    num_wait_time_category = 10
    num_action = 10
    q = QLearning(num_phase, max_num_car_stopped, num_lane, num_action)

    # we start with phase 2 where EW has green
    #traci.trafficlight.setPhase("0", 2)
    while traci.simulation.getMinExpectedNumber() > 0:
        traci.simulationStep()

        #next_action_idx = 9

        # 現在の信号のフェーズ
        light_phase = traci.trafficlight.getPhase("0")

        # 現在のフェーズが黄色かつまだ次のアクションを決めていなかったら、次のフェーズの秒数を決める
        if (light_phase == 1
                or light_phase == 3) and not q.is_calculate_next_action:

            q.is_set_duration = False

            # 次に信号が取るフェーズを取得
            next_light_phase = 0
            if light_phase == 1:
                next_light_phase = 2

            # それぞれのレーンで停まっている車の数
            count_0 = min(traci.lanearea.getLastStepHaltingNumber("0"), 9)
            count_1 = min(traci.lanearea.getLastStepHaltingNumber("1"), 9)
            count_2 = min(traci.lanearea.getLastStepHaltingNumber("2"), 9)
            count_3 = min(traci.lanearea.getLastStepHaltingNumber("3"), 9)

            # 次の信号のフェーズと現在の混雑状況
            current_state_dict = {
                'light_phase': next_light_phase,
                'nums_car_stopped': [count_0, count_1, count_2, count_3]
            }

            current_digitized_state = q.digitize_state(current_state_dict)
            q.next_action_idx = q.get_action(current_digitized_state)
            q.is_calculate_next_action = True

            # reward
            reward = -np.sum(
                [x**1.5 for x in [count_0, count_1, count_2, count_3]])
            q.rewards.append(reward)

            # 各青赤フェーズが終了したタイミングで、以前の状況に対してとったアクションに対するリワードを計算するため、このタイミングで、前回のstateとactionに対するリワードを計算する？

            q.update_Qtable(q.previous_digitized_state, q.previous_action,
                            reward, current_digitized_state)

            q.previous_digitized_state = current_digitized_state
            q.previous_action_idx = q.next_action_idx

        # 現在のフェーズが0か2でかつまだ秒数をセットしていなかったら、秒数をセットする
        if (light_phase == 0 or light_phase == 2) and not q.is_set_duration:
            traci.trafficlight.setPhaseDuration("0",
                                                q.action[q.next_action_idx])
            q.is_set_duration = True
            q.is_calculate_next_action = False
            print("set phase {} for {} seconds".format(
                light_phase, q.action[q.next_action_idx]))

        step += 1
        if step % 10000 == 0:
            plot_graph(q.rewards)

    traci.close()
    sys.stdout.flush()