Python DQNAgent.learnの例

プログラミング言語: Python

名前空間/パッケージ名: dqn_agent

クラス/型: DQNAgent

メソッド/関数: learn

hotexamples.comのコード掲載数: 3

Python DQNAgent.learn - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdqn_agent.DQNAgent.learnの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DQNAgent(30)

act(16)

best_state(12)

choose_action(11)

add_to_memory(10)

load(8)

epsilon(6)

__init__(3)

learn(3)

feed(3)

Q_values(3)

experience_replay(3)

append_experience(1)

get_td_error(1)

get_qs(1)

get_random_action(1)

greedy_action(1)

get_uncertainty(1)

get_loss(1)

increase_target_update_counter(1)

init_model(1)

init_simple_model(1)

learn_step(1)

learning_rate(1)

get_q_values(1)

feedback_observe(1)

get_input_shape(1)

get_action(1)

gamma(1)

feedback_learn(1)

evaluate_on_fixed_set(1)

evaluate(1)

eval_step(1)

epsilon_decay(1)

episode_end(1)

do_setup(1)

compile(1)

collect_exp(1)

choose_opt_action(1)

choose_action_(1)

exploration(1)

コード例 #1

ファイルを表示

ファイル: run_dqn_agent.py プロジェクト: phoenixstanford2017/phoenixAtariAI

def training(**kwargs):
    # Set logging level
    if kwargs['debug']:
        LOGGER.setLevel(logging.DEBUG)
    else:
        LOGGER.setLevel(logging.INFO)

    agent = DQNAgent(environment=env,
                     action_space=[0, 1, 2, 3, 4, 5, 6, 7],
                     NN_arch=kwargs['NN_arch'],
                     maxIters=kwargs['max_iters'],
                     eta=0.00001,
                     epsilon=0.4,
                     discount=0.95,
                     weights_dir=kwargs['weights_dir'],
                     mem_size=10**5)

    while True:
        agent.learn(replay=kwargs['replay'],
                    frame_skipping=kwargs['frame_skipping'],
                    batch_size=kwargs['batch_size'])
        if agent.numIters > agent.maxIters:
            break

    agent.save(agent.save_path % kwargs['max_iters'])
    # return the agent object
    return agent

コード例 #2

ファイルを表示

ファイル: main_dqn.py プロジェクト: suryadheeshjith/Deep-Q-Learning-Paper-To-Code

    scores, eps_history, steps_array = [], [], []

    for i in range(n_games):
        done = False
        observation = env.reset()

        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward

            if not load_checkpoint:
                agent.store_transition(observation, action, reward,
                                       observation_, done)
                agent.learn()
            observation = observation_
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.1f' % avg_score,
              'best score %.2f' % best_score, 'epsilon %.2f' % agent.epsilon,
              'steps', n_steps)

        if avg_score > best_score:
            if not load_checkpoint:
                agent.save_models()
            best_score = avg_score

コード例 #3

ファイルを表示

ファイル: main_dqn.py プロジェクト: asahi8769/reinforcement-learning

def dqn_train():
    env = make_env('PongNoFrameskip-v4')

    load_checkpoint = False
    save_checkpoint = True
    learning_enabled = True
    rendering_enabled = False

    n_games = 100
    agent = DQNAgent(gamma=0.99,
                     epsilon=1.0,
                     lr=0.0001,
                     input_dims=(env.observation_space.shape),
                     n_actions=env.action_space.n,
                     mem_size=50000,
                     eps_min=0.1,
                     batch_size=32,
                     replace=1000,
                     eps_dec=1e-5,
                     chkpt_dir='models/',
                     algo='DQNAgent',
                     env_name='PongNoFrameskip-v4')

    if load_checkpoint:
        agent.load_models()
        with open('models/best_score.pkl', 'rb') as file:
            best_score = pickle.load(file)
    else:
        best_score = -np.inf

    fname = agent.algo + '_' + agent.env_name + '_lr' + str(
        agent.lr) + '_' + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'

    n_steps = 0
    scores, eps_history, steps_array = [], [], []

    for i in range(n_games):
        done = False
        observation = env.reset()
        score = 0
        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            if learning_enabled:
                agent.store_transition(observation, action, reward,
                                       observation_, int(done))
                agent.learn()
            if rendering_enabled:
                env.render()
            observation = observation_
            n_steps += 1
        scores.append(score)
        steps_array.append(n_steps)

        avg_score = np.mean(scores[-100:])
        print('episode: ', i, 'score: ', score,
              ' average score %.1f' % avg_score,
              'best score %.2f' % best_score, 'epsilon %.2f' % agent.epsilon,
              'steps', n_steps)

        if avg_score > best_score:
            best_score = avg_score
            if save_checkpoint:
                agent.save_models()
                with open('models/best_score.pkl', 'wb') as file:
                    pickle.dump(best_score, file)

        eps_history.append(agent.epsilon)
        if load_checkpoint and n_steps >= 18000:
            break

    x = [i + 1 for i in range(len(scores))]
    plot_learning_curve(steps_array, scores, eps_history, figure_file)