def __init__(self, game, thread_id, optimizer, global_step): self.name = "worker_" + str(thread_id) self.thread_id = thread_id self.model_path = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name) self.optimizer = optimizer self.global_episode = global_step self.increment_global_episode = self.global_episode.assign_add(1) self.episode_rewards = [] # if not FLAGS.train: self.episode_optimal_rewards = [] self.episodes_suboptimal_arms = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.summaries_dir, FLAGS.model_name) + "/worker_" + str(self.thread_id)) self.summary = tf.Summary() if FLAGS.use_conv: self.local_AC = ConvNetwork(self.name, optimizer, self.global_episode) else: self.local_AC = ACNetwork(self.name, optimizer, self.global_episode) self.update_local_vars = update_target_graph('global', self.name) self.env = game
def __init__(self, game, sess, thread_id, nb_actions, optimizer, global_step): self.name = "worker_" + str(thread_id) self.thread_id = thread_id self.model_path = FLAGS.checkpoint_dir self.trainer = optimizer self.global_episode = global_step self.increment_global_episode = self.global_episode.assign_add(1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.sess = sess self.graph = sess.graph # self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir + "/worker_" + str(self.thread_id), self.graph) self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir + "/worker_" + str(self.thread_id)) self.summary = tf.Summary() if FLAGS.lstm: self.local_AC = ACNetworkLSTM(self.name, nb_actions, optimizer) else: self.local_AC = ACNetwork(self.name, nb_actions, optimizer) self.update_local_ops = update_target_graph('global', self.name) self.actions = np.zeros([nb_actions]) self.env = game
def run(): recreate_directory_structure() tf.reset_default_graph() sess = tf.Session() # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) with sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) if FLAGS.use_conv: global_network = ConvNetwork('global', None) else: global_network = ACNetwork('global', None) # num_agents = multiprocessing.cpu_count() num_agents = FLAGS.nb_concurrent agents = [] envs = [] for i in range(num_agents): gym_env = gym.make(FLAGS.game) # if FLAGS.monitor: # gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True) envs.append(gym_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step)) saver = tf.train.Saver(max_to_keep=5) coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state( os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: thread = threading.Thread( target=(lambda: agent.play(sess, coord, saver))) thread.start() agent_threads.append(thread) while True: if FLAGS.show_training: for env in envs: # time.sleep(1) # with main_lock: env.render() coord.join(agent_threads)
def __init__(self, game, nb_actions, optimizer, global_step): self.name = "policy_eval" if FLAGS.lstm: self.local_AC = ACNetworkLSTM(self.name, nb_actions, optimizer) else: self.local_AC = ACNetwork(self.name, nb_actions, optimizer) self.update_local_ops = update_target_graph('global', self.name) self.summary_writer = tf.summary.FileWriter(FLAGS.summaries_dir + "/policy_eval") self.env = game self.actions = np.zeros([nb_actions]) self.global_episode = global_step
def run(): tf.reset_default_graph() sess = tf.Session() with sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) if FLAGS.use_conv: global_network = ConvNetwork('global', None) else: global_network = ACNetwork('global', None) saver = tf.train.Saver(max_to_keep=5) if FLAGS.resume: ckpt = tf.train.get_checkpoint_state( os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name)) print("Loading Model from {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) gym_env_monitor = gym.make(FLAGS.game) if FLAGS.monitor: gym_env_monitor = gym.wrappers.Monitor( gym_env_monitor, os.path.join(FLAGS.test_experiments_dir, FLAGS.model_name), force=True) pe = PolicyMonitor(game=gym_env_monitor, optimizer=optimizer, global_step=global_step) coord = tf.train.Coordinator() # Start a thread for policy eval task monitor_thread = threading.Thread( target=lambda: pe.eval_nb_test_episodes(sess)) monitor_thread.start() import time while True: if FLAGS.show_training: time.sleep(1) with main_lock: gym_env_monitor.render() coord.join([monitor_thread])
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"]) # print("Loading Model from {}".format(ckpt.model_checkpoint_path)) try: saver.restore(sess, ckpt.model_checkpoint_path) except Exception as e: print(sys.exc_info()[0]) print(e) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run(): tf.reset_default_graph() with tf.Session() as sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) # optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, 0.99, 0.0, 1e-6) gym_env_monitor = gym.make(FLAGS.game) gym_env_monitor.seed(FLAGS.seed) gym_env_monitor_wrapper = AtariEnvironment(gym_env=gym_env_monitor, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height, agent_history_length=FLAGS.agent_history_length) nb_actions = len(gym_env_monitor_wrapper.gym_actions) if FLAGS.lstm: global_network = ACNetworkLSTM('global', nb_actions, None) else: global_network = ACNetwork('global', nb_actions, None) saver = tf.train.Saver(max_to_keep=5) if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) pe = PolicyMonitor( game=gym_env_monitor_wrapper, nb_actions=nb_actions, optimizer=optimizer, global_step=global_step ) pe.eval_1000(sess)
def __init__(self, game, thread_id, optimizer, global_step, settings): self.name = "agent_" + str(thread_id) self.thread_id = thread_id self.model_path = settings["checkpoint_dir"] self.settings = settings self.optimizer = optimizer self.global_episode = global_step self.increment_global_episode = self.global_episode.assign_add(1) self.episode_rewards = [] # if not FLAGS.train: self.episode_regrets = [] self.episodes_suboptimal_arms = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter(settings["summaries_dir"] + "/agent_" + str(self.thread_id)) self.summary = tf.Summary() self.local_AC = ACNetwork(self.name, optimizer, self.global_episode) self.update_local_vars = update_target_graph('global', self.name) self.env = game
def run(): recreate_directory_structure() tf.reset_default_graph() sess = tf.Session() # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) with sess: with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) # optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) optimizer = tf.train.RMSPropOptimizer(FLAGS.lr, 0.99, 0.0, 1e-6) num_workers = FLAGS.nb_concurrent #num_workers = multiprocessing.cpu_count() - 1 workers = [] envs = [] for i in range(num_workers): gym_env = gym.make(FLAGS.game) if FLAGS.seed: gym_env.seed(FLAGS.seed) if FLAGS.monitor: gym_env = gym.wrappers.Monitor( gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i)) this_env = AtariEnvironment( gym_env=gym_env, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height, agent_history_length=FLAGS.agent_history_length) envs.append(this_env) nb_actions = len(envs[0].gym_actions) if FLAGS.lstm: global_network = ACNetworkLSTM('global', nb_actions, None) else: global_network = ACNetwork('global', nb_actions, None) for i in range(num_workers): workers.append( Worker(envs[i], sess, i, nb_actions, optimizer, global_step)) saver = tf.train.Saver(max_to_keep=5) # gym_env_monitor = gym.make(FLAGS.game) # gym_env_monitor.seed(FLAGS.seed) # gym_env_monitor_wrapper = AtariEnvironment(gym_env=gym_env_monitor, resized_width=FLAGS.resized_width, # resized_height=FLAGS.resized_height, # agent_history_length=FLAGS.agent_history_length) # nb_actions = len(gym_env_monitor_wrapper.gym_actions) # pe = PolicyMonitor( # game=gym_env_monitor_wrapper, # nb_actions=nb_actions, # optimizer=optimizer, # global_step=global_step # ) coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) worker_threads = [] for worker in workers: t = threading.Thread(target=(lambda: worker.play(coord, saver))) t.start() worker_threads.append(t) # Start a thread for policy eval task # monitor_thread = threading.Thread(target=lambda: pe.continuous_eval(FLAGS.eval_every, sess, coord)) # monitor_thread.start() import time while True: if FLAGS.show_training: for env in envs: # time.sleep(1) # with main_lock: env.env.render() coord.join(worker_threads)