def initialize_agents(config): global_step = tf.Variable(0, dtype=tf.int32, name='global_step', trainable=False) envs = [_create_environment(config) for _ in range(config.num_agents)] action_size = envs[0].action_space.n nb_states = envs[0].nb_states if config.agent_type == "a3c": global_network = config.network("global", config, action_size, nb_states) if FLAGS.task == "matrix": agent = config.agent(envs[0], 0, global_step, config, FLAGS.task) elif FLAGS.task == "option": agent = config.agent(envs[0], 0, global_step, config, FLAGS.task) elif FLAGS.task == "play_option": agent = config.agent(envs[0], 0, global_step, config, FLAGS.task) else: if config.agent_type == "a3c": agents = [ config.agent(envs[i], i, global_step, config, FLAGS.task) for i in range(config.num_agents) ] return agents else: agent = config.agent(envs[0], 0, global_step, config, FLAGS.task) return agent
def train(config, env_processes, logdir): tf.reset_default_graph() sess = tf.Session() stage_logdir = os.path.join(logdir, "tabular_sf") tf.gfile.MakeDirs(stage_logdir) with sess: with tf.device("/cpu:0"): with config.unlocked: config.logdir = logdir config.stage_logdir = stage_logdir config.network_optimizer = getattr(tf.train, config.network_optimizer) env = _create_environment(config) action_size = env.action_space.n nb_states = env.nb_states sf = np.eye((env.nb_states)) delta = np.inf theta = 1 while (theta < delta): delta = 0.0 for s in range(nb_states): sf_s = sf[s] a = np.random.choice(range(action_size)) s1, r = env.get_next_state_and_reward(s, a) state_features = np.identity(nb_states) sf_s1 = state_features[s] + config.discount * sf[s1] delta = max(delta, np.sum(np.abs(sf_s - sf_s1))) sf[s] = sf_s1 u, s, v = np.linalg.svd(sf) print(s)
def train(config, env_processes, logdir): tf.reset_default_graph() sess = tf.Session() with sess: with tf.device("/cpu:0"): with config.unlocked: config.logdir = logdir config.network_optimizer = getattr(tf.train, config.network_optimizer) global_step = tf.Variable(0, dtype=tf.int32, name='global_step', trainable=False) envs = [ _create_environment(config) for _ in range(config.num_agents) ] action_size = envs[0].action_space.n global_network = config.network("global", config, action_size) agents = [ config.agent(envs[i], i, global_step, config) for i in range(config.num_agents) ] saver = loader = utility.define_saver( exclude=(r'.*_temporary/.*', )) if FLAGS.load_from is not None: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state( os.path.join(FLAGS.load_from, "models")) print("Loading Model from {}".format( ckpt.model_checkpoint_path)) loader.restore(sess, ckpt.model_checkpoint_path) sess.run(tf.local_variables_initializer()) else: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) coord = tf.train.Coordinator() agent_threads = [] for agent in agents: thread = threading.Thread( target=(lambda: agent.play(sess, coord, saver))) thread.start() agent_threads.append(thread) while True: if FLAGS.show_training: for env in envs: env.render() coord.join(agent_threads)
def train(config, env_processes, logdir): tf.reset_default_graph() sess = tf.Session() previous_stage_logdir = os.path.join(logdir, "sf_repres") matrix_stage_logdir = os.path.join(logdir, "sf_matrix") stage_logdir = os.path.join(logdir, "plot_sf_policy") tf.gfile.MakeDirs(stage_logdir) with sess: with tf.device("/cpu:0"): with config.unlocked: config.logdir = logdir config.stage_logdir = stage_logdir config.matrix_stage_logdir = matrix_stage_logdir eval, evect = get_direction(config.matrix_stage_logdir) config.network_optimizer = getattr(tf.train, config.network_optimizer) global_step = tf.Variable(0, dtype=tf.int32, name='global_step', trainable=False) env = _create_environment(config) action_size = env.action_space.n global_network = config.network("global", config, action_size, 5) global_network.option = FLAGS.option agent = config.option_agent(env, 0, global_step, config, FLAGS.option, eval, evect, FLAGS.flip_eigen, 5) saver = utility.define_saver(exclude=(r'.*_temporary/.*', )) loader = utility.define_saver(exclude=(r'.*_temporary/.*', )) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state( os.path.join(previous_stage_logdir, "models")) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) loader.restore(sess, ckpt.model_checkpoint_path) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() agent_threads = [] thread = threading.Thread( target=(lambda: agent.plot_heatmap(sess, coord, saver))) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def train(config, env_processes, logdir): tf.reset_default_graph() sess = tf.Session() stage_logdir = os.path.join(logdir, "linear_sf") tf.gfile.MakeDirs(stage_logdir) with sess: with tf.device("/cpu:0"): with config.unlocked: config.logdir = logdir config.stage_logdir = stage_logdir config.network_optimizer = getattr(tf.train, config.network_optimizer) global_step = tf.Variable(0, dtype=tf.int32, name='global_step', trainable=False) envs = [_create_environment(config) for _ in range(config.num_agents)] action_size = envs[0].action_space.n nb_states = envs[0].nb_states global_network = config.network("global", config, action_size, nb_states) if FLAGS.task == "matrix": agent = config.linear_sf_agent(envs[0], 0, global_step, config) else: agents = [config.linear_sf_agent(envs[i], i, global_step, config) for i in range(config.num_agents)] saver = loader = utility.define_saver(exclude=(r'.*_temporary/.*',)) if FLAGS.resume: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(os.path.join(os.path.join(FLAGS.load_from, "linear_sf"), "models")) print("Loading Model from {}".format(ckpt.model_checkpoint_path)) loader.restore(sess, ckpt.model_checkpoint_path) sess.run(tf.local_variables_initializer()) else: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) coord = tf.train.Coordinator() agent_threads = [] if FLAGS.task == "matrix": thread = threading.Thread(target=(lambda: agent.build_matrix1(sess, coord, saver))) thread.start() agent_threads.append(thread) else: for agent in agents: thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver))) thread.start() agent_threads.append(thread) coord.join(agent_threads)