コード例 #1
0
ファイル: train_dqn.py プロジェクト: wenwenla/DeepRL
def test_ddqn():
    args = DDQNArgs()
    env = gym.make(args.env_name)
    agent = DDQNAgent(env, QNet, SimpleNormalizer, args)
    agent.load(args.save_dir)
    mean_reward = [agent.test_one_episode(True) for _ in range(100)]
    print(np.mean(mean_reward))
コード例 #2
0
ファイル: train_dqn.py プロジェクト: wenwenla/DeepRL
def train_ddqn():
    args = DDQNArgs()
    env = gym.make(args.env_name)
    agent = DDQNAgent(env, QNet, SimpleNormalizer, args)
    pre_best = -1e9
    for ep in range(args.max_ep):
        agent.train_one_episode()
        if ep % args.test_interval == 0:
            r = agent.test_model()
            if r > pre_best:
                pre_best = r
                agent.save(args.save_dir)
コード例 #3
0
def main():
    np.random.seed(12345678)

    n = 10
    env = PermutationSorting(n)
    state_transformer = OneHotStateTransformer(n)
    agent = DDQNAgent(env, state_transformer)
    agent.parallel_pretrain(rows=10000, epochs=30)
    # agent.load_pretrain_weights()
    agent.train(episodes=10000, max_steps=250)
    # agent.load_final_weights()

    for _ in range(10):
        p = np.random.permutation(n)
        rl_ans = agent.solve(p)
        exact_ans = PermutationExactSolver(n).solve(p)
        print(p, '-', 'RL:', rl_ans, ' Exact:', exact_ans)
コード例 #4
0
import numpy as np
from ddqn import DDQNAgent
from utils import plot_learning_curve, make_env

if __name__ == '__main__':
    env = make_env('PongNoFrameskip-v4')
    best_score = -np.inf
    load_checkpoint = False
    n_games = 100
    agent = DDQNAgent(gamma=0.99,
                      epsilon=1.0,
                      lr=0.0001,
                      input_dims=(env.observation_space.shape),
                      n_actions=env.action_space.n,
                      mem_size=50000,
                      eps_min=0.1,
                      batch_size=32,
                      replace=10000,
                      eps_dec=1e-5,
                      chkpt_dir='models/',
                      algo='DDQNAgent',
                      env_name='PongNoFrameskip-v4')

    if load_checkpoint:
        agent.load_models()

    fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \
            + str(n_games) + 'games'
    figure_file = 'plots/' + fname + '.png'

    n_steps = 0