def test(action_set, level_names): """Test.""" Agent = agent_factory(FLAGS.agent_name) level_returns = {level_name: [] for level_name in level_names} with tf.Graph().as_default(): agent = Agent(len(action_set)) outputs = {} for level_name in level_names: env = create_environment(level_name, seed=1, is_test=True) outputs[level_name] = build_actor(agent, env, level_name, action_set) with tf.train.SingularMonitoredSession(checkpoint_dir=FLAGS.logdir, hooks=[ py_process.PyProcessHook() ]) as session: for level_name in level_names: tf.logging.info('Testing level: %s', level_name) while True: done_v, infos_v = session.run( (outputs[level_name].env_outputs.done, outputs[level_name].env_outputs.info)) returns = level_returns[level_name] returns.extend(infos_v.episode_return[1:][done_v[1:]]) if len(returns) >= FLAGS.test_num_episodes: tf.logging.info('Mean episode return: %f', np.mean(returns)) break
def main(args): batch_summary = {} seeds = range(args.num_repetitions) # have the same outputs for every agent outputs_list = [args.output_products] parameter_combinations = itertools.product(seeds, args.agent_types, args.budgets, args.test_ranges, args.simulation_durations, outputs_list, args.return_from_isolations) parameter_combinations = clear_redundand_parameter_combinations( args, parameter_combinations) pbar = tqdm(total=len(parameter_combinations), desc="Completed runs") type_bugdet = [] agents_sum = [] for (seed, agent_type, budget, test_range, simulation_duration, outputs, rfi_raw_name) in parameter_combinations: # Randomness initialization numpy.random.seed(seed) random.seed(seed) add_AT_B = "{}_{}".format(agent_type, budget) if add_AT_B not in type_bugdet: type_bugdet.append(add_AT_B) if agent_type not in agents_sum: agents_sum.append(agent_type) # Set up paths for output products of the current run run_dirname = '{}__AT_{}__RFI_{}__B_{}__SD_{}__S_{}'.format( args.run_dir_name_prefix, agent_type, rfi_raw_name, budget, simulation_duration, seed) if 'args' in args.output_products: safely_dump_dictionary(os.path.join(args.output_dir, run_dirname), 'args.txt', args.__dict__) agent = agent_factory(agent_type, rfi_raw_name) my_simulator = Epidemic_simulator(args.simulation_inputs_filepath, args.output_dir, run_dirname) run_summary = my_simulator.run_simulation( agent=agent, budget=budget, test_range=test_range, simulation_duration=simulation_duration, outputs=outputs, verbose=False) batch_summary[run_dirname] = run_summary pbar.update(1) batch_summary['type_bugdet'] = type_bugdet batch_summary['agents_sum'] = list(agents_sum) pbar.close() safely_dump_dictionary(args.output_dir, args.run_dir_name_prefix + '_batch_summary.txt', batch_summary, verbose=True) get_metrics_graphs(batch_summary, args.output_dir, agent_order=args.agent_types)
def test(action_set, level_names): """Test.""" Agent = agent_factory(FLAGS.agent_name) level_returns = {level_name: [] for level_name in level_names} with tf.Graph().as_default(): outputs = {} agent = Agent(len(action_set)) for level_name in level_names: env = create_atari_environment(level_name, seed=1, is_test=True) outputs[level_name] = build_actor(agent, env, level_name, action_set) logdir = FLAGS.logdir # tf.logging.info("LOGDIR IS: {}".format(logdir)) with tf.train.SingularMonitoredSession(checkpoint_dir=logdir, hooks=[ py_process.PyProcessHook() ]) as session: for level_name in level_names: tf.logging.info('Testing level: %s', level_name) while True: done_v, infos_v = session.run( (outputs[level_name].env_outputs.done, outputs[level_name].env_outputs.info)) returns = level_returns[level_name] if infos_v.episode_return[1:][done_v[1:]]: tf.logging.info("Return: {}".format( level_returns[level_name])) returns.extend(infos_v.episode_return[1:][done_v[1:]]) if len(returns) >= FLAGS.test_num_episodes: tf.logging.info('Mean episode return: %f', np.mean(returns)) break no_cap = utilities_atari.compute_human_normalized_score(level_returns, per_level_cap=None) cap_100 = utilities_atari.compute_human_normalized_score(level_returns, per_level_cap=100) tf.logging.info('No cap.: %f Cap 100: %f', no_cap, cap_100)
"illness_states_only_breakdown_daily, infection_probability_per_group_daily, " "ill_person_count_per_group_daily.") args = parser.parse_args() # Randomness initialization numpy.random.seed(args.seed) random.seed(args.seed) # Set up paths for output products run_dirname = '{}__AT_{}__RFI_{}__B_{}__SD_{}__S_{}'.format( args.run_dir_name_prefix, args.agent_type, args.return_from_isolations, args.budget, args.simulation_duration, args.seed) if 'args' in args.output_products: safely_dump_dictionary(os.path.join(args.output_dir, run_dirname), 'args.txt', args.__dict__) # Go # batch_summary = {} agent = agent_factory(args.agent_type, args.return_from_isolations) my_simulator = Epidemic_simulator(args.simulation_inputs_filepath, args.output_dir, run_dirname) run_summary = my_simulator.run_simulation( agent=agent, budget=args.budget, test_range=args.test_range, simulation_duration=args.simulation_duration, outputs=args.output_products, verbose=True) # batch_summary[run_dirname] = run_summary
def train(action_set, level_names): """Train.""" if is_single_machine(): local_job_device = '' shared_job_device = '' is_actor_fn = lambda i: True is_learner = True global_variable_device = '/gpu' server = tf.train.Server.create_local_server() filters = [] else: local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task) shared_job_device = '/job:learner/task:0' is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task is_learner = FLAGS.job_name == 'learner' # Placing the variable on CPU, makes it cheaper to send it to all the # actors. Continual copying the variables from the GPU is slow. global_variable_device = shared_job_device + '/cpu' cluster = tf.train.ClusterSpec({ 'actor': ['localhost:%d' % (8001 + i) for i in range(FLAGS.num_actors)], 'learner': ['localhost:8000'] }) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task) filters = [shared_job_device, local_job_device] # Only used to find the actor output structure. Agent = agent_factory(FLAGS.agent_name) with tf.Graph().as_default(): specific_atari_game = level_names[0] env = create_atari_environment(specific_atari_game, seed=1) agent = Agent(len(action_set)) structure = build_actor(agent, env, specific_atari_game, action_set) flattened_structure = nest.flatten(structure) dtypes = [t.dtype for t in flattened_structure] shapes = [t.shape.as_list() for t in flattened_structure] with tf.Graph().as_default(), \ tf.device(local_job_device + '/cpu'), \ pin_global_variables(global_variable_device): tf.set_random_seed(FLAGS.seed) # Makes initialization deterministic. # Create Queue and Agent on the learner. with tf.device(shared_job_device): queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer') agent = Agent(len(action_set)) if is_single_machine() and 'dynamic_batching' in sys.modules: # For single machine training, we use dynamic batching for improved GPU # utilization. The semantics of single machine training are slightly # different from the distributed setting because within a single unroll # of an environment, the actions may be computed using different weights # if an update happens within the unroll. old_build = agent._build @dynamic_batching.batch_fn def build(*args): with tf.device('/gpu'): return old_build(*args) tf.logging.info('Using dynamic batching.') agent._build = build # Build actors and ops to enqueue their output. enqueue_ops = [] for i in range(FLAGS.num_actors): if is_actor_fn(i): level_name = level_names[i % len(level_names)] tf.logging.info('Creating actor %d with level %s', i, level_name) env = create_atari_environment(level_name, seed=i + 1) actor_output = build_actor(agent, env, level_name, action_set) with tf.device(shared_job_device): enqueue_ops.append( queue.enqueue(nest.flatten(actor_output))) # If running in a single machine setup, run actors with QueueRunners # (separate threads). if is_learner and enqueue_ops: tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) # Build learner. if is_learner: # Create global step, which is the number of environment frames processed. global_step = tf.get_variable('num_environment_frames', initializer=tf.zeros_initializer(), shape=[], dtype=tf.int64, trainable=False, collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) # Create batch (time major) and recreate structure. dequeued = queue.dequeue_many(FLAGS.batch_size) dequeued = nest.pack_sequence_as(structure, dequeued) def make_time_major(s): return nest.map_structure( lambda t: tf.transpose(t, [1, 0] + list( range(t.shape.ndims))[2:]), s) dequeued = dequeued._replace( env_outputs=make_time_major(dequeued.env_outputs), agent_outputs=make_time_major(dequeued.agent_outputs)) with tf.device('/gpu'): # Using StagingArea allows us to prepare the next batch and send it to # the GPU while we're performing a training step. This adds up to 1 step # policy lag. flattened_output = nest.flatten(dequeued) area = tf.contrib.staging.StagingArea( [t.dtype for t in flattened_output], [t.shape for t in flattened_output]) stage_op = area.put(flattened_output) # Returns an ActorOutput tuple -> (level name, agent_state, env_outputs, agent_output) data_from_actors = nest.pack_sequence_as(structure, area.get()) # levels_index = tf.map_fn(lambda y: tf.py_function(lambda x: game_id[x.numpy()], [y], Tout=tf.int32), data_from_actors.level_name, dtype=tf.int32, parallel_iterations=56) # levels_index = tf.reshape(levels_index, [FLAGS.batch_size]) levels_index = data_from_actors.level_id # Unroll agent on sequence, create losses and update ops. output = build_learner(agent, data_from_actors.env_outputs, data_from_actors.agent_outputs, global_step=global_step, levels_index=levels_index) # Create MonitoredSession (to run the graph, checkpoint and log). tf.logging.info('Creating MonitoredSession, is_chief %s', is_learner) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) config = tf.ConfigProto(allow_soft_placement=True, device_filters=filters, gpu_options=gpu_options) # config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.8 logdir = FLAGS.logdir with tf.train.MonitoredTrainingSession( server.target, is_chief=is_learner, checkpoint_dir=logdir, save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, config=config, hooks=[py_process.PyProcessHook()]) as session: if is_learner: # Logging. level_returns = {level_name: [] for level_name in level_names} summary_dir = os.path.join(FLAGS.logdir, "logging") summary_writer = tf.summary.FileWriterCache.get(summary_dir) # Prepare data for first run. session.run_step_fn( lambda step_context: step_context.session.run(stage_op)) # Execute learning and track performance. num_env_frames_v = 0 # Uncomment these lines to print the number of parameters. # print("total params:", np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])) # vas = tf.trainable_variables() # for elem in vas: # print(elem) # print("Params: ", [v.get_shape().as_list() for v in tf.trainable_variables()]) while num_env_frames_v < FLAGS.total_environment_frames: level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run( (data_from_actors.level_name, ) + output + (stage_op, )) level_names_v = np.repeat([level_names_v], done_v.shape[0], 0) for level_name, episode_return, episode_step, acc_episode_reward, acc_episode_step in zip( level_names_v[done_v], infos_v.episode_return[done_v], infos_v.episode_step[done_v], infos_v.acc_episode_reward[done_v], infos_v.acc_episode_step[done_v]): episode_frames = episode_step * FLAGS.num_action_repeats tf.logging.info( 'Level: %s Episode return: %f Acc return %f after %d frames', level_name, episode_return, acc_episode_reward, num_env_frames_v) summary = tf.summary.Summary() summary.value.add(tag=level_name + '/episode_return', simple_value=episode_return) summary.value.add(tag=level_name + '/episode_frames', simple_value=episode_frames) summary.value.add(tag=level_name + '/acc_episode_return', simple_value=acc_episode_reward) summary.value.add(tag=level_name + '/acc_episode_frames', simple_value=acc_episode_step) summary_writer.add_summary(summary, num_env_frames_v) level_returns[level_name].append(episode_return) current_episode_return_list = min( map(len, level_returns.values())) if FLAGS.multi_task == 1 and current_episode_return_list >= 1: def sum_none(list_): if list_: return sum(list_) else: return None level_returns = { level_name: sum_none(level_returns[level_name]) for level_name in level_names } no_cap = atari_utils.compute_human_normalized_score( level_returns, per_level_cap=None) cap_100 = atari_utils.compute_human_normalized_score( level_returns, per_level_cap=100) summary = tf.summary.Summary() summary.value.add(tag=(level_name + '/training_no_cap'), simple_value=no_cap) summary.value.add(tag=(level_name + '/training_cap_100'), simple_value=cap_100) level_returns = { level_name: [] for level_name in level_names } else: # Execute actors (they just need to enqueue their output). while True: session.run(enqueue_ops)
def train(action_set, level_names): """Train.""" local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task) shared_job_device = '/job:learner/task:0' is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task is_learner = FLAGS.job_name == 'learner' actor_hosts = FLAGS.actor_hosts.split(',') num_actors = len(actor_hosts) learner_host = FLAGS.learner_host.split(',') assert (len(learner_host) == 1) if is_learner: assert (FLAGS.task == 0) assert (has_horovod == True) hvd.init() # Placing the variable on CPU, makes it cheaper to send it to all the # actors. Continual copying the variables from the GPU is slow. global_variable_device = '/job:learner/task:0' + '/cpu' filters = [shared_job_device, local_job_device] cluster = tf.train.ClusterSpec({ 'actor': actor_hosts, 'learner': learner_host }) config = tf.ConfigProto(allow_soft_placement=True, device_filters=filters) if is_learner: config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task, config=config) # Only used to find the actor output structure. Agent = agent_factory(FLAGS.agent_name) with tf.Graph().as_default(): agent = Agent(len(action_set)) env = create_environment(level_names[0], seed=1) structure = build_actor(agent, env, level_names[0], action_set) flattened_structure = nest.flatten(structure) dtypes = [t.dtype for t in flattened_structure] shapes = [t.shape.as_list() for t in flattened_structure] # build graph for actor or learner with tf.Graph().as_default(), \ tf.device(local_job_device + '/cpu'), \ pin_global_variables(global_variable_device): tf.set_random_seed(FLAGS.seed) # Makes initialization deterministic. # Create Queue and Agent on the learner. with tf.device(shared_job_device): queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer') agent = Agent(len(action_set)) # Build actors and ops to enqueue their output. enqueue_ops = [] for i in range(num_actors): if is_actor_fn(i): level_name = level_names[i % len(level_names)] tf.logging.info('Creating actor %d with level %s', i, level_name) env = create_environment(level_name, seed=i + 1) actor_output = build_actor(agent, env, level_name, action_set) with tf.device(shared_job_device): enqueue_ops.append( queue.enqueue(nest.flatten(actor_output))) # Build learner. if is_learner: # Create global step, which is the number of environment frames # processed. g_step = tf.get_variable('num_environment_frames', initializer=tf.zeros_initializer(), shape=[], dtype=tf.int64, trainable=False, collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) # Create batch (time major) and recreate structure. dequeued = queue.dequeue_many(FLAGS.batch_size) dequeued = nest.pack_sequence_as(structure, dequeued) def make_time_major(s): return nest.map_structure( lambda t: tf.transpose(t, [1, 0] + list( range(t.shape.ndims))[2:]), s) dequeued = dequeued._replace( env_outputs=make_time_major(dequeued.env_outputs), agent_outputs=make_time_major(dequeued.agent_outputs)) with tf.device("/gpu"): # Using StagingArea allows us to prepare the next batch and send it to # the GPU while we're performing a training step. This adds up to 1 # step policy lag. flattened_output = nest.flatten(dequeued) area = tf.contrib.staging.StagingArea( [t.dtype for t in flattened_output], [t.shape for t in flattened_output]) stage_op = area.put(flattened_output) data_from_actors = nest.pack_sequence_as(structure, area.get()) # Unroll agent on sequence, create losses and update ops. if hasattr(data_from_actors, 'agent_state'): agent_state = data_from_actors.agent_state else: agent_state = agent.initial_state(1) output, optimizer = build_learner( agent, agent_state=agent_state, env_outputs=data_from_actors.env_outputs, agent_outputs=data_from_actors.agent_outputs, g_step=g_step) # Create MonitoredSession (to run the graph, checkpoint and log). is_chief = is_learner # MonitoredTrainingSession inits all global variables hooks = [py_process.PyProcessHook()] if is_learner: # for variable initialization across learners hooks.append(hvd.BroadcastGlobalVariablesHook(0)) tf.logging.info('Creating MonitoredSession, is_chief %s', is_chief) if is_learner: tf.logging.info('At rank %d', hvd.rank()) # rank 0 takes care of ckpt saving checkpoint_dir = FLAGS.logdir if is_learner and hvd.rank( ) == 0 else None with tf.train.MonitoredTrainingSession(server.target, is_chief=is_chief, checkpoint_dir=checkpoint_dir, save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, config=config, hooks=hooks) as session: if is_learner: # tb Logging summary_writer = (tf.summary.FileWriterCache.get(FLAGS.logdir) if hvd.rank() == 0 else None) # Prepare data for first run. session.run_step_fn( lambda step_context: step_context.session.run(stage_op)) # Execute learning and track performance. num_env_frames_v = 0 while num_env_frames_v < FLAGS.total_environment_frames: level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run( (data_from_actors.level_name, ) + output + (stage_op, )) level_names_v = np.repeat([level_names_v], done_v.shape[0], 0) for level_name, episode_return, episode_step in zip( level_names_v[done_v], infos_v.episode_return[done_v], infos_v.episode_step[done_v]): episode_frames = episode_step tf.logging.info( 'learner rank: %d, Env: %s Episode return: %f', hvd.rank(), level_name, episode_return) if hvd.rank() == 0: # tb Logging summary = tf.summary.Summary() summary.value.add(tag=level_name + '/episode_return', simple_value=episode_return) summary.value.add(tag=level_name + '/episode_frames', simple_value=episode_frames) summary_writer.add_summary(summary, num_env_frames_v) else: # Execute actors (they just need to enqueue their output). while True: session.run(enqueue_ops)