Esempi in Python per QLearningAgent.observe

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: qlearning_agent

Classe/tipologia: QLearningAgent

Metodo/funzione: observe

Esempi su hotexamples.com: 2

QLearningAgent.observe in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per qlearning_agent.QLearningAgent.observe, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

QLearningAgent(10)

act(2)

observe(2)

save(2)

Q(1)

epsilon(1)

play_move(1)

play_opponent_move(1)

set_q_value(1)

train(1)

traning(1)

Esempio n. 1

Mostra file

File: simulator.py Progetto: tocom242242/mysociety

def main(cfg):
    pygame.init()

    # フォントの作成
    sysfont = pygame.font.SysFont(None, 40)
    screen = pygame.display.set_mode(WINDOW_SIZE)
    pygame.display.set_caption("Grid World")

    done = False

    clock = pygame.time.Clock()

    # grid worldの初期化
    grid_env = GridWorld()  # grid worldの環境の初期化
    ini_state = grid_env.start_pos  # 初期状態（エージェントのスタート地点の位置）
    agent = QLearningAgent(
        epsilon=cfg["agent"]["epsilon"],
        epsilon_decay_rate=cfg["agent"]["epsilon_decay_rate"],
        actions=np.arange(4),
        observation=ini_state)  # Q学習エージェント

    nb_episode = cfg["nb_episode"]  # エピソード数
    save_interval = cfg["save_interval"]
    result_dir = cfg["result_dir"]
    max_step = 1
    rewards = []  # 評価用報酬の保存
    is_end_episode = False  # エージェントがゴールしてるかどうか？

    step = 0
    # time.sleep(30)

    for episode in range(nb_episode):
        print("episode:", episode)
        episode_reward = []  # 1エピソードの累積報酬
        step = 0
        while (is_end_episode is False and step < max_step):  # ゴールするまで続ける
            action = agent.act()  # 行動選択
            state, reward, is_end_episode = grid_env.step(action)
            agent.observe(state, reward)  # 状態と報酬の観測
            episode_reward.append(reward)

            screen.fill(BLACK)
            # grid worldの描画
            draw_grid_world(grid_env.map, screen)
            # テキストを描画したSurfaceを作成
            step_str = sysfont.render("step:{}".format(step), False, WHITE)
            # 位# テキストを描画する
            screen.blit(step_str, (500, 50))
            clock.tick(1)
            step += 1

            # 再描画
            pygame.display.flip()

        rewards.append(np.sum(episode_reward))  # このエピソードの平均報酬を与える
        state = grid_env.reset()  # 初期化
        agent.observe(state)  # エージェントを初期位置に
        is_end_episode = False
        print("step:", step)
        agents = [agent]

        if episode % save_interval == 0:
            save_result(agents, episode, result_dir)

    pygame.quit()

Esempio n. 2

Mostra file

File: run.py Progetto: tocom242242/qlearning

from grid_world import GridWorld

if __name__ == '__main__':
    grid_env = GridWorld() # grid worldの環境の初期化
    ini_state = grid_env.start_pos  # 初期状態（エージェントのスタート地点の位置）
    policy = EpsGreedyQPolicy(epsilon=.01) # 方策の初期化。ここではε-greedy
    agent = QLearningAgent(actions=np.arange(4), observation=ini_state, policy=policy) # Q Learning エージェントの初期化
    nb_episode = 100   #エピソード数
    rewards = []    # 評価用報酬の保存
    is_goal = False # エージェントがゴールしてるかどうか？
    for episode in range(nb_episode):
        episode_reward = [] # 1エピソードの累積報酬
        while(is_goal == False):    # ゴールするまで続ける
            action = agent.act()    # 行動選択
            state, reward, is_goal = grid_env.step(action)
            agent.observe(state, reward)   # 状態と報酬の観測
            episode_reward.append(reward)
        rewards.append(np.sum(episode_reward)) # このエピソードの平均報酬を与える
        state = grid_env.reset()    #  初期化
        agent.observe(state)    # エージェントを初期位置に
        is_goal = False

    # テスト(greedyアクション)
    agent.traning = False
    while(is_goal == False):    # ゴールするまで続ける
        print("(y, x):{}".format(state))
        action = agent.act()    # 行動選択
        print(action)
        state, reward, is_goal = grid_env.step(action)
        agent.observe(state, reward)   # 状態と報酬の観測