def init(self): import tensorflow as tf self.env_opts = environments.get_env_options(self.env_name, self.env_producer.get_use_gpu()) self.env = self.env_producer.get_new_environment() self.s0 = self.env.reset() self.session = utils.create_session(self.env_opts, False) with tf.device("/cpu:0"): with tf.variable_scope("gather-%s" % self.idx): pol = get_policy(self.env_opts, self.session) self.agent = PPOAgent(pol, self.session, "gather-%s" % self.idx, self.env_opts) self.trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "gather-%s" % self.idx) self.accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in self.trainable_vars] assign_ops = [self.trainable_vars[i].assign(self.accum_vars[i]) for i in range(len(self.trainable_vars))] self.assign_op = tf.group(assign_ops) self.session.run(tf.global_variables_initializer()) self.cur_hidden_state = self.agent.get_init_hidden_state() self.episode = [self.s0], [], [], [], [], [self.cur_hidden_state], []
def start(env): env = gym.make(env) frames = [] MASTER_NAME = "master-0" IMAGE_PATH = "images/%s.gif" % env.spec.id tf.reset_default_graph() with tf.Session() as session: with tf.variable_scope(MASTER_NAME) as scope: env_opts = environments.get_env_options(env, False) policy = get_policy(env_opts, session) master_agent = PPOAgent(policy, session, MASTER_NAME, env_opts) saver = tf.train.Saver(max_to_keep=1) saver = tf.train.import_meta_graph( tf.train.latest_checkpoint("models/%s/" % env.spec.id) + ".meta") saver.restore(session, tf.train.latest_checkpoint("models/%s/" % env.spec.id)) try: pass except: print("Failed to restore model, starting from scratch") session.run(tf.global_variables_initializer()) global_step = 0 while global_step < 1000: terminal = False s0 = env.reset() cum_rew = 0 cur_hidden_state = master_agent.get_init_hidden_state() episode_count = 0 while not terminal: episode_count += 1 frames.append(env.render(mode='rgb_array')) action, h_out = master_agent.get_strict_sample( s0, cur_hidden_state) cur_hidden_state = h_out s0, r, terminal, _ = env.step(action) cum_rew += r global_step += 1 print(episode_count, cum_rew) imageio.mimsave(IMAGE_PATH, frames, duration=1.0 / 60.0)
def init_agent(self): import tensorflow as tf env_opts = environments.get_env_options( self.env_name, self.env_producer.get_use_gpu()) self.session = utils.create_session(env_opts, True) with tf.variable_scope("worker-%s" % self.idx): pol = get_policy(env_opts, self.session) self.agent = PPOAgent(pol, self.session, "worker-%s" % self.idx, env_opts) self.trainable_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "worker-%s" % self.idx) self.accum_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in self.trainable_vars ] p_vars = self.agent.p_opt.variables() v_vars = self.agent.v_opt.variables() self.p_opt_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in p_vars ] self.v_opt_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in v_vars ] p_assign_ops = [ p_vars[i].assign(self.p_opt_vars[i]) for i in range(len(p_vars)) ] v_assign_ops = [ v_vars[i].assign(self.v_opt_vars[i]) for i in range(len(v_vars)) ] assign_ops = [ self.trainable_vars[i].assign(self.accum_vars[i]) for i in range(len(self.trainable_vars)) ] self.assign_op = tf.group(assign_ops + p_assign_ops + v_assign_ops) self.session.run(tf.global_variables_initializer()) self.run()
def start(env, gpu): env = gym.make(env) env_name = env.spec.id if not os.path.exists('logs'): os.mkdir('logs') if not os.path.exists('models'): os.mkdir('models') try: shutil.rmtree("logs/" + env_name) except: pass env_producer = environments.EnvironmentProducer(env.spec.id) env_opts = environments.get_env_options(env, gpu) worker_num = env_opts["worker_num"] gather_per_worker = env_opts["gather_per_worker"] master = SimpleMaster(worker_num, gather_per_worker, env_opts, env_producer)
def start(env): env = gym.make(env) MASTER_NAME = "master-0" tf.reset_default_graph() with tf.Session() as session: with tf.variable_scope(MASTER_NAME) as scope: env_opts = environments.get_env_options(env, False) policy = get_policy(env_opts, session) master_agent = PPOAgent(policy, session, MASTER_NAME, env_opts) saver = tf.train.Saver(max_to_keep=1) saver = tf.train.import_meta_graph(tf.train.latest_checkpoint("models/%s/" % env.spec.id) + ".meta") saver.restore(session, tf.train.latest_checkpoint("models/%s/" % env.spec.id)) try: pass except: print("Failed to restore model, starting from scratch") session.run(tf.global_variables_initializer()) while True: terminal = False s0 = env.reset() cum_rew = 0 cur_hidden_state = master_agent.get_init_hidden_state() episode_count = 0 while not terminal: episode_count += 1 env.render() action, h_out = master_agent.get_strict_sample(s0, cur_hidden_state) cur_hidden_state = h_out s0, r, terminal, _ = env.step(action) cum_rew += r print(episode_count, cum_rew)
def start(self): import tensorflow as tf env_opts = environments.get_env_options( self.env_name, self.env_producer.get_use_gpu()) self.summary_writer = tf.summary.FileWriter("logs/%s" % self.env_name) self.session = utils.create_session(env_opts, True) with tf.variable_scope("master-0"): pol = get_policy(env_opts, self.session) self.agent = PPOAgent(pol, self.session, "master-0", env_opts) self.trainable_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "master-0") self.accum_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in self.trainable_vars ] p_vars = self.agent.p_opt.variables() v_vars = self.agent.v_opt.variables() self.p_opt_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in p_vars ] self.v_opt_vars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in v_vars ] p_assign_ops = [ p_vars[i].assign(self.p_opt_vars[i]) for i in range(len(p_vars)) ] v_assign_ops = [ v_vars[i].assign(self.v_opt_vars[i]) for i in range(len(v_vars)) ] assign_ops = [ self.trainable_vars[i].assign(self.accum_vars[i]) for i in range(len(self.trainable_vars)) ] self.assign_op = tf.group(assign_ops + p_assign_ops + v_assign_ops) self.restore_variables() self.saver = tf.train.Saver(max_to_keep=1) self.session.run(tf.global_variables_initializer()) try: self.saver = tf.train.import_meta_graph( tf.train.latest_checkpoint("models/%s/" % env_opts["env_name"]) + ".meta") self.saver.restore( self.session, tf.train.latest_checkpoint("models/%s/" % env_opts["env_name"])) except: print("failed to restore model") while True: if self.iter_count % 10 == 0: print("Saving model...") self.save_variables() self.saver.save(self.session, self.model_path, self.iter_count) print("Model saved") self.broadcast_weights() self.merge_weights() self.iter_count += 1