def get_agent(name, *args, **kwargs): if name == 'QLearningAgent': import qlearning return qlearning.QLearningAgent(*args, **kwargs) elif name == 'DQNAgent': import dqn return dqn.DQNAgent(*args, **kwargs) else: raise Exception('Invalid agent name.')
def make_agent(args, input_shape, num_actions: int, output_dir: str): replay_memory = ReplayMemory(max_size=args.replay_buffer_size, batch_size=args.batch_size) exploration_strategy = EpsilonGreedyStrategy( epsilon_max=args.epsilon_max, epsilon_min=args.epsilon_min, epsilon_decay=args.epsilon_decay, ) hyper_parameters = dqn.HyperParameters(args.learning_rate, args.gamma) if args.dueling: dqn_class = dqn.make_dqn_dueling else: dqn_class = dqn.make_dqn if args.double: agent = dqn.DoubleDQNAgent( target_dqn=dqn_class( input_shape=input_shape, hidden_dim=args.hidden_dim, num_actions=num_actions, ), target_update_rate=args.target_update_rate, dqn=dqn_class( input_shape=input_shape, hidden_dim=args.hidden_dim, num_actions=num_actions, ), replay_memory=replay_memory, exploration_strategy=exploration_strategy, hyper_parameters=hyper_parameters, num_actions=num_actions, output_dir=output_dir, ) else: agent = dqn.DQNAgent( dqn=dqn_class( input_shape=input_shape, hidden_dim=args.hidden_dim, num_actions=num_actions, ), replay_memory=replay_memory, exploration_strategy=exploration_strategy, hyper_parameters=hyper_parameters, num_actions=num_actions, output_dir=output_dir, ) return agent
def main(): env = gym.make('CartPole-v1') state_size = env.observation_space.shape[0] action_size = env.action_space.n learning_rate = 1e-3 model = build_model(state_size, action_size, learning_rate) agent = dqn.DQNAgent(model, state_size, action_size) agent.fit() env = gym.wrappers.Monitor(env, "./video", video_callable=lambda episode_id: True, force=True) for _ in range(10): agent.play(env)
env = gym.make('CartPole-v0') exprep = exp_replay.ExpReplay(mem_size=MEM_SIZE, start_mem=START_MEM, state_size=STATE_SIZE, kth=-1, batch_size=BATCH_SIZE) sess = tf.Session() with tf.device('/{}:0'.format(DEVICE)): agent = dqn.DQNAgent(session=sess, epsilon=EPSILON, epsilon_anneal=EPSILON_DECAY, end_epsilon=END_EPSILON, lr=LEARNING_RATE, gamma=DISCOUNT_FACTOR, state_size=4, action_size=len(ACTIONS), n_hidden_1=10, n_hidden_2=10) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() if os.path.isdir(MODEL_DIR): saver.restore(sess, MODEL_PATH) agent.epsilon = agent.end_epsilon print 'restored model' if TRAIN: exprep = pickle.load(open(MEMORY_PATH, "rb")) history = [e_length for e_length in train(agent, exprep, env)] saver.save(sess, MODEL_PATH)
def __init__(self, state_size, action_size): self.agent = D.DQNAgent(state_size, action_size) self.N = state_size self.reward = []
q_net.cuda() agent = dqn.DistributionalDQNAgent(q_net, args.double_dqn, train_env.num_actions, args.num_atoms, -10, 10) else: if args.dueling: q_net_builder = model.build_dueling_network else: q_net_builder = model.build_basic_network q_net = q_net_builder(args.num_frames, args.frame_size, train_env.num_actions, args.noisy_net, args.sigma0, args.net) q_net.cuda() agent = dqn.DQNAgent(q_net, args.double_dqn, train_env.num_actions) if args.noisy_net: train_policy = GreedyEpsilonPolicy(0, agent) else: train_policy = LinearDecayGreedyEpsilonPolicy(args.train_start_eps, args.train_final_eps, args.train_eps_num_steps, agent) eval_policy = GreedyEpsilonPolicy(args.eval_eps, agent) replay_memory = ReplayMemory(args.replay_buffer_size) replay_memory.burn_in(train_env, agent, args.burn_in_frames) evaluator = lambda logger: evaluate(eval_env, eval_policy, 10, logger) train(agent, train_env, train_policy, replay_memory, args.gamma,
agent.epsilon_decay() print("当前的epsilon为{}".format(agent.epsilon)) agent.learn(buffer=buffer, num_steps=128, batch_size=256) if i % 20 == 0 and i > 0: saver.save(sess, save_path) print("save model successfully!") if __name__ == "__main__": env = SnakeEnv(gameSpeed=5, train_model=True) save_path = "./snake/model" # ob = env.reset() # print(ob,type(ob),ob.shape) buffer = ReplayBuffer(buffer_size=8192) sess = tf.Session() agent = dqn.DQNAgent(sess=sess, epsilon=0.9, epsilon_anneal=0.01, end_epsilon=0.1, lr=0.001, gamma=0.9, state_size=3, action_size=4, name_scope="dqn") sess.run(tf.global_variables_initializer()) train(agent=agent, buffer=buffer, env=env, num_episodes=10000, max_steps=100, save_path=save_path)
import dqn as dqn import gym import numpy as np episodes = 500 EPISODES = 100 batch_size =32 TRAIN_MODE = True TEST_MODE = False if __name__ == "__main__": env = gym.make('CartPole-v0') state_size = env.observation_space.shape[0] action_size = env.action_space.n print(state_size,action_size) agent = dqn.DQNAgent(state_size,action_size) if TRAIN_MODE: #Iterate the game for e in range(EPISODES): #rest state in the beginning of each game state = env.reset() state = np.reshape(state, [1,4]) #time_t represents each frame of the game for time_t in range(500): # turn this on if you want to render # env.render() # Decide action action = agent.act(state) # Advance the game to the next frame based on the action.