def test_ddqn(): args = DDQNArgs() env = gym.make(args.env_name) agent = DDQNAgent(env, QNet, SimpleNormalizer, args) agent.load(args.save_dir) mean_reward = [agent.test_one_episode(True) for _ in range(100)] print(np.mean(mean_reward))
def train_ddqn(): args = DDQNArgs() env = gym.make(args.env_name) agent = DDQNAgent(env, QNet, SimpleNormalizer, args) pre_best = -1e9 for ep in range(args.max_ep): agent.train_one_episode() if ep % args.test_interval == 0: r = agent.test_model() if r > pre_best: pre_best = r agent.save(args.save_dir)
def main(): np.random.seed(12345678) n = 10 env = PermutationSorting(n) state_transformer = OneHotStateTransformer(n) agent = DDQNAgent(env, state_transformer) agent.parallel_pretrain(rows=10000, epochs=30) # agent.load_pretrain_weights() agent.train(episodes=10000, max_steps=250) # agent.load_final_weights() for _ in range(10): p = np.random.permutation(n) rl_ans = agent.solve(p) exact_ans = PermutationExactSolver(n).solve(p) print(p, '-', 'RL:', rl_ans, ' Exact:', exact_ans)
import numpy as np from ddqn import DDQNAgent from utils import plot_learning_curve, make_env if __name__ == '__main__': env = make_env('PongNoFrameskip-v4') best_score = -np.inf load_checkpoint = False n_games = 100 agent = DDQNAgent(gamma=0.99, epsilon=1.0, lr=0.0001, input_dims=(env.observation_space.shape), n_actions=env.action_space.n, mem_size=50000, eps_min=0.1, batch_size=32, replace=10000, eps_dec=1e-5, chkpt_dir='models/', algo='DDQNAgent', env_name='PongNoFrameskip-v4') if load_checkpoint: agent.load_models() fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \ + str(n_games) + 'games' figure_file = 'plots/' + fname + '.png' n_steps = 0