def run(self): """ Apply procedures of training for a QN Args: exp_schedule: exploration strategy for epsilon lr_schedule: schedule for learning rate """ # initialize self.initialize() config = self.config for index, env in enumerate(self.envs): if index < self.config.start_index: continue exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) self.q, self.target_q, self.update_target_op, self.loss, self.train_op, self.grad_norm = self.ops[ index] self.sess.run(self.update_target_op) # important self.env = env self.index = index # record one game at the beginning if self.config.record: self.record() self.train(exp_schedule, lr_schedule)
def _train(opponents, train_from_scratch=False, render=False): env = pommerman.make('PommeFFACompetition-v0', []) # Exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # Learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # Initialize agents. dqn_agent = DQNAgent(env, config, exp_schedule, lr_schedule, True, train_from_scratch=train_from_scratch) dqn_agent_index = _init_agents(env, exp_schedule, lr_schedule, opponents, dqn_agent) t = 1 while t < config.nsteps_train: state = env.reset() done = False while not done: t += 1 if render: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) if reward[dqn_agent_index] == -1 and not done: # Stop the episode when the training agent dies. dqn_agent.episode_end(-1) done = True env.close()
def _test(opponents, match_num=20, render=True): env = pommerman.make('PommeFFACompetition-v0', []) # Exploration strategy exp_schedule = LinearExploration(env, 0, 0, 1) # Learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # Initialize agents. dqn_agent = DQNAgent(env, config, exp_schedule, lr_schedule, False) dqn_agent_index = _init_agents(env, exp_schedule, lr_schedule, opponents, dqn_agent) count = 0 win = 0 for _ in range(match_num): state = env.reset() done = False while not done: if render: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) if reward[0] == 1: win += 1 print('win at episode %d' % count) if reward[dqn_agent_index] == -1 and not done: # Stop the episode when the testing agent dies. done = True count += 1 print(win / count) env.close()
############################################################## #################### YOUR CODE HERE - 8-12 lines ############# optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) scope_variable = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope) grads_and_vars = optimizer.compute_gradients(self.loss, scope_variable) if self.config.grad_clip: clipped_grads_and_vars = [(tf.clip_by_norm(item[0], self.config.clip_val), item[1]) for item in grads_and_vars] self.train_op = optimizer.apply_gradients(clipped_grads_and_vars) self.grad_norm = tf.global_norm([item[0] for item in grads_and_vars]) ############################################################## ######################## END YOUR CODE ####################### if __name__ == '__main__': env = EnvTest((5, 5, 1)) # exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # train model model = Linear(env, config) model.run(exp_schedule, lr_schedule)
def print_config(config): print 'Current config:\n' variables = zip(vars(config).keys(), vars(config).values()) for var, val in sorted(variables): print var + ' = ' + str(val) if __name__ == '__main__': args = parse_args() my_config = modify_config(args) print_config(my_config) with tf.device('/gpu:' + str(args.gpu)): # make env env = gym.make(my_config.env_name) env = wrap_dqn(env) # exploration strategy exp_schedule = LinearExploration(env, my_config.eps_begin, my_config.eps_end, my_config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(my_config.lr_begin, my_config.lr_end, my_config.lr_nsteps) # train model model = NatureQN(env, my_config) model.run(exp_schedule, lr_schedule)