예제 #1
0
def test(action_set, level_names):
    """Test."""

    Agent = agent_factory(FLAGS.agent_name)
    level_returns = {level_name: [] for level_name in level_names}
    with tf.Graph().as_default():
        agent = Agent(len(action_set))
        outputs = {}
        for level_name in level_names:
            env = create_environment(level_name, seed=1, is_test=True)
            outputs[level_name] = build_actor(agent, env, level_name,
                                              action_set)

        with tf.train.SingularMonitoredSession(checkpoint_dir=FLAGS.logdir,
                                               hooks=[
                                                   py_process.PyProcessHook()
                                               ]) as session:
            for level_name in level_names:
                tf.logging.info('Testing level: %s', level_name)
                while True:
                    done_v, infos_v = session.run(
                        (outputs[level_name].env_outputs.done,
                         outputs[level_name].env_outputs.info))
                    returns = level_returns[level_name]
                    returns.extend(infos_v.episode_return[1:][done_v[1:]])

                    if len(returns) >= FLAGS.test_num_episodes:
                        tf.logging.info('Mean episode return: %f',
                                        np.mean(returns))
                        break
예제 #2
0
def main(args):
    batch_summary = {}
    seeds = range(args.num_repetitions)
    # have the same outputs for every agent
    outputs_list = [args.output_products]
    parameter_combinations = itertools.product(seeds, args.agent_types,
                                               args.budgets, args.test_ranges,
                                               args.simulation_durations,
                                               outputs_list,
                                               args.return_from_isolations)
    parameter_combinations = clear_redundand_parameter_combinations(
        args, parameter_combinations)
    pbar = tqdm(total=len(parameter_combinations), desc="Completed runs")
    type_bugdet = []
    agents_sum = []
    for (seed, agent_type, budget, test_range, simulation_duration, outputs,
         rfi_raw_name) in parameter_combinations:

        # Randomness initialization
        numpy.random.seed(seed)
        random.seed(seed)
        add_AT_B = "{}_{}".format(agent_type, budget)
        if add_AT_B not in type_bugdet:
            type_bugdet.append(add_AT_B)
        if agent_type not in agents_sum:
            agents_sum.append(agent_type)
        # Set up paths for output products of the current run
        run_dirname = '{}__AT_{}__RFI_{}__B_{}__SD_{}__S_{}'.format(
            args.run_dir_name_prefix, agent_type, rfi_raw_name, budget,
            simulation_duration, seed)
        if 'args' in args.output_products:
            safely_dump_dictionary(os.path.join(args.output_dir, run_dirname),
                                   'args.txt', args.__dict__)

        agent = agent_factory(agent_type, rfi_raw_name)
        my_simulator = Epidemic_simulator(args.simulation_inputs_filepath,
                                          args.output_dir, run_dirname)
        run_summary = my_simulator.run_simulation(
            agent=agent,
            budget=budget,
            test_range=test_range,
            simulation_duration=simulation_duration,
            outputs=outputs,
            verbose=False)
        batch_summary[run_dirname] = run_summary
        pbar.update(1)
    batch_summary['type_bugdet'] = type_bugdet
    batch_summary['agents_sum'] = list(agents_sum)
    pbar.close()
    safely_dump_dictionary(args.output_dir,
                           args.run_dir_name_prefix + '_batch_summary.txt',
                           batch_summary,
                           verbose=True)
    get_metrics_graphs(batch_summary,
                       args.output_dir,
                       agent_order=args.agent_types)
예제 #3
0
def test(action_set, level_names):
    """Test."""
    Agent = agent_factory(FLAGS.agent_name)
    level_returns = {level_name: [] for level_name in level_names}
    with tf.Graph().as_default():
        outputs = {}
        agent = Agent(len(action_set))
        for level_name in level_names:
            env = create_atari_environment(level_name, seed=1, is_test=True)
            outputs[level_name] = build_actor(agent, env, level_name,
                                              action_set)

        logdir = FLAGS.logdir
        # tf.logging.info("LOGDIR IS: {}".format(logdir))
        with tf.train.SingularMonitoredSession(checkpoint_dir=logdir,
                                               hooks=[
                                                   py_process.PyProcessHook()
                                               ]) as session:
            for level_name in level_names:
                tf.logging.info('Testing level: %s', level_name)
                while True:
                    done_v, infos_v = session.run(
                        (outputs[level_name].env_outputs.done,
                         outputs[level_name].env_outputs.info))
                    returns = level_returns[level_name]
                    if infos_v.episode_return[1:][done_v[1:]]:
                        tf.logging.info("Return: {}".format(
                            level_returns[level_name]))
                    returns.extend(infos_v.episode_return[1:][done_v[1:]])

                    if len(returns) >= FLAGS.test_num_episodes:
                        tf.logging.info('Mean episode return: %f',
                                        np.mean(returns))
                        break

    no_cap = utilities_atari.compute_human_normalized_score(level_returns,
                                                            per_level_cap=None)
    cap_100 = utilities_atari.compute_human_normalized_score(level_returns,
                                                             per_level_cap=100)
    tf.logging.info('No cap.: %f Cap 100: %f', no_cap, cap_100)
예제 #4
0
    "illness_states_only_breakdown_daily, infection_probability_per_group_daily, "
    "ill_person_count_per_group_daily.")

args = parser.parse_args()

# Randomness initialization
numpy.random.seed(args.seed)
random.seed(args.seed)

# Set up paths for output products
run_dirname = '{}__AT_{}__RFI_{}__B_{}__SD_{}__S_{}'.format(
    args.run_dir_name_prefix, args.agent_type, args.return_from_isolations,
    args.budget, args.simulation_duration, args.seed)
if 'args' in args.output_products:
    safely_dump_dictionary(os.path.join(args.output_dir, run_dirname),
                           'args.txt', args.__dict__)

# Go
# batch_summary = {}
agent = agent_factory(args.agent_type, args.return_from_isolations)
my_simulator = Epidemic_simulator(args.simulation_inputs_filepath,
                                  args.output_dir, run_dirname)
run_summary = my_simulator.run_simulation(
    agent=agent,
    budget=args.budget,
    test_range=args.test_range,
    simulation_duration=args.simulation_duration,
    outputs=args.output_products,
    verbose=True)
# batch_summary[run_dirname] = run_summary
예제 #5
0
def train(action_set, level_names):
    """Train."""
    if is_single_machine():
        local_job_device = ''
        shared_job_device = ''
        is_actor_fn = lambda i: True
        is_learner = True
        global_variable_device = '/gpu'
        server = tf.train.Server.create_local_server()
        filters = []
    else:
        local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task)
        shared_job_device = '/job:learner/task:0'
        is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task
        is_learner = FLAGS.job_name == 'learner'

        # Placing the variable on CPU, makes it cheaper to send it to all the
        # actors. Continual copying the variables from the GPU is slow.
        global_variable_device = shared_job_device + '/cpu'
        cluster = tf.train.ClusterSpec({
            'actor':
            ['localhost:%d' % (8001 + i) for i in range(FLAGS.num_actors)],
            'learner': ['localhost:8000']
        })
        server = tf.train.Server(cluster,
                                 job_name=FLAGS.job_name,
                                 task_index=FLAGS.task)
        filters = [shared_job_device, local_job_device]

    # Only used to find the actor output structure.
    Agent = agent_factory(FLAGS.agent_name)
    with tf.Graph().as_default():
        specific_atari_game = level_names[0]
        env = create_atari_environment(specific_atari_game, seed=1)
        agent = Agent(len(action_set))
        structure = build_actor(agent, env, specific_atari_game, action_set)
        flattened_structure = nest.flatten(structure)
        dtypes = [t.dtype for t in flattened_structure]
        shapes = [t.shape.as_list() for t in flattened_structure]

    with tf.Graph().as_default(), \
         tf.device(local_job_device + '/cpu'), \
         pin_global_variables(global_variable_device):
        tf.set_random_seed(FLAGS.seed)  # Makes initialization deterministic.

        # Create Queue and Agent on the learner.
        with tf.device(shared_job_device):
            queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer')
            agent = Agent(len(action_set))

            if is_single_machine() and 'dynamic_batching' in sys.modules:
                # For single machine training, we use dynamic batching for improved GPU
                # utilization. The semantics of single machine training are slightly
                # different from the distributed setting because within a single unroll
                # of an environment, the actions may be computed using different weights
                # if an update happens within the unroll.
                old_build = agent._build

                @dynamic_batching.batch_fn
                def build(*args):

                    with tf.device('/gpu'):
                        return old_build(*args)

                tf.logging.info('Using dynamic batching.')
                agent._build = build

        # Build actors and ops to enqueue their output.
        enqueue_ops = []
        for i in range(FLAGS.num_actors):
            if is_actor_fn(i):
                level_name = level_names[i % len(level_names)]
                tf.logging.info('Creating actor %d with level %s', i,
                                level_name)
                env = create_atari_environment(level_name, seed=i + 1)
                actor_output = build_actor(agent, env, level_name, action_set)
                with tf.device(shared_job_device):
                    enqueue_ops.append(
                        queue.enqueue(nest.flatten(actor_output)))

        # If running in a single machine setup, run actors with QueueRunners
        # (separate threads).
        if is_learner and enqueue_ops:

            tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))

        # Build learner.
        if is_learner:
            # Create global step, which is the number of environment frames processed.
            global_step = tf.get_variable('num_environment_frames',
                                          initializer=tf.zeros_initializer(),
                                          shape=[],
                                          dtype=tf.int64,
                                          trainable=False,
                                          collections=[
                                              tf.GraphKeys.GLOBAL_STEP,
                                              tf.GraphKeys.GLOBAL_VARIABLES
                                          ])

            # Create batch (time major) and recreate structure.
            dequeued = queue.dequeue_many(FLAGS.batch_size)
            dequeued = nest.pack_sequence_as(structure, dequeued)

            def make_time_major(s):
                return nest.map_structure(
                    lambda t: tf.transpose(t, [1, 0] + list(
                        range(t.shape.ndims))[2:]), s)

            dequeued = dequeued._replace(
                env_outputs=make_time_major(dequeued.env_outputs),
                agent_outputs=make_time_major(dequeued.agent_outputs))

            with tf.device('/gpu'):
                # Using StagingArea allows us to prepare the next batch and send it to
                # the GPU while we're performing a training step. This adds up to 1 step
                # policy lag.
                flattened_output = nest.flatten(dequeued)
                area = tf.contrib.staging.StagingArea(
                    [t.dtype for t in flattened_output],
                    [t.shape for t in flattened_output])
                stage_op = area.put(flattened_output)

                # Returns an ActorOutput tuple -> (level name, agent_state, env_outputs, agent_output)
                data_from_actors = nest.pack_sequence_as(structure, area.get())

                # levels_index = tf.map_fn(lambda y: tf.py_function(lambda x: game_id[x.numpy()], [y], Tout=tf.int32), data_from_actors.level_name, dtype=tf.int32, parallel_iterations=56)
                # levels_index = tf.reshape(levels_index, [FLAGS.batch_size])
                levels_index = data_from_actors.level_id
                # Unroll agent on sequence, create losses and update ops.
                output = build_learner(agent,
                                       data_from_actors.env_outputs,
                                       data_from_actors.agent_outputs,
                                       global_step=global_step,
                                       levels_index=levels_index)

        # Create MonitoredSession (to run the graph, checkpoint and log).
        tf.logging.info('Creating MonitoredSession, is_chief %s', is_learner)
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
        config = tf.ConfigProto(allow_soft_placement=True,
                                device_filters=filters,
                                gpu_options=gpu_options)
        # config.gpu_options.allow_growth = True
        # config.gpu_options.per_process_gpu_memory_fraction = 0.8
        logdir = FLAGS.logdir
        with tf.train.MonitoredTrainingSession(
                server.target,
                is_chief=is_learner,
                checkpoint_dir=logdir,
                save_checkpoint_secs=600,
                save_summaries_secs=30,
                log_step_count_steps=50000,
                config=config,
                hooks=[py_process.PyProcessHook()]) as session:

            if is_learner:
                # Logging.
                level_returns = {level_name: [] for level_name in level_names}
                summary_dir = os.path.join(FLAGS.logdir, "logging")
                summary_writer = tf.summary.FileWriterCache.get(summary_dir)
                # Prepare data for first run.
                session.run_step_fn(
                    lambda step_context: step_context.session.run(stage_op))
                # Execute learning and track performance.
                num_env_frames_v = 0

                # Uncomment these lines to print the number of parameters.
                # print("total params:", np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]))
                # vas = tf.trainable_variables()
                # for elem in vas:
                #   print(elem)
                # print("Params: ", [v.get_shape().as_list() for v in tf.trainable_variables()])

                while num_env_frames_v < FLAGS.total_environment_frames:
                    level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run(
                        (data_from_actors.level_name, ) + output +
                        (stage_op, ))

                    level_names_v = np.repeat([level_names_v], done_v.shape[0],
                                              0)
                    for level_name, episode_return, episode_step, acc_episode_reward, acc_episode_step in zip(
                            level_names_v[done_v],
                            infos_v.episode_return[done_v],
                            infos_v.episode_step[done_v],
                            infos_v.acc_episode_reward[done_v],
                            infos_v.acc_episode_step[done_v]):

                        episode_frames = episode_step * FLAGS.num_action_repeats
                        tf.logging.info(
                            'Level: %s Episode return: %f Acc return %f after %d frames',
                            level_name, episode_return, acc_episode_reward,
                            num_env_frames_v)

                        summary = tf.summary.Summary()
                        summary.value.add(tag=level_name + '/episode_return',
                                          simple_value=episode_return)
                        summary.value.add(tag=level_name + '/episode_frames',
                                          simple_value=episode_frames)
                        summary.value.add(tag=level_name +
                                          '/acc_episode_return',
                                          simple_value=acc_episode_reward)
                        summary.value.add(tag=level_name +
                                          '/acc_episode_frames',
                                          simple_value=acc_episode_step)
                        summary_writer.add_summary(summary, num_env_frames_v)

                        level_returns[level_name].append(episode_return)

                    current_episode_return_list = min(
                        map(len, level_returns.values()))
                    if FLAGS.multi_task == 1 and current_episode_return_list >= 1:

                        def sum_none(list_):
                            if list_:
                                return sum(list_)
                            else:
                                return None

                        level_returns = {
                            level_name: sum_none(level_returns[level_name])
                            for level_name in level_names
                        }

                        no_cap = atari_utils.compute_human_normalized_score(
                            level_returns, per_level_cap=None)
                        cap_100 = atari_utils.compute_human_normalized_score(
                            level_returns, per_level_cap=100)

                        summary = tf.summary.Summary()
                        summary.value.add(tag=(level_name +
                                               '/training_no_cap'),
                                          simple_value=no_cap)
                        summary.value.add(tag=(level_name +
                                               '/training_cap_100'),
                                          simple_value=cap_100)

                        level_returns = {
                            level_name: []
                            for level_name in level_names
                        }
            else:
                # Execute actors (they just need to enqueue their output).
                while True:
                    session.run(enqueue_ops)
예제 #6
0
def train(action_set, level_names):
    """Train."""

    local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task)
    shared_job_device = '/job:learner/task:0'
    is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task
    is_learner = FLAGS.job_name == 'learner'
    actor_hosts = FLAGS.actor_hosts.split(',')
    num_actors = len(actor_hosts)
    learner_host = FLAGS.learner_host.split(',')
    assert (len(learner_host) == 1)
    if is_learner:
        assert (FLAGS.task == 0)
        assert (has_horovod == True)
        hvd.init()

    # Placing the variable on CPU, makes it cheaper to send it to all the
    # actors. Continual copying the variables from the GPU is slow.
    global_variable_device = '/job:learner/task:0' + '/cpu'
    filters = [shared_job_device, local_job_device]
    cluster = tf.train.ClusterSpec({
        'actor': actor_hosts,
        'learner': learner_host
    })
    config = tf.ConfigProto(allow_soft_placement=True, device_filters=filters)
    if is_learner:
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = str(hvd.local_rank())
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task,
                             config=config)

    # Only used to find the actor output structure.
    Agent = agent_factory(FLAGS.agent_name)
    with tf.Graph().as_default():
        agent = Agent(len(action_set))
        env = create_environment(level_names[0], seed=1)
        structure = build_actor(agent, env, level_names[0], action_set)
        flattened_structure = nest.flatten(structure)
        dtypes = [t.dtype for t in flattened_structure]
        shapes = [t.shape.as_list() for t in flattened_structure]

    # build graph for actor or learner
    with tf.Graph().as_default(), \
         tf.device(local_job_device + '/cpu'), \
         pin_global_variables(global_variable_device):
        tf.set_random_seed(FLAGS.seed)  # Makes initialization deterministic.

        # Create Queue and Agent on the learner.
        with tf.device(shared_job_device):
            queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer')
            agent = Agent(len(action_set))

        # Build actors and ops to enqueue their output.
        enqueue_ops = []
        for i in range(num_actors):
            if is_actor_fn(i):
                level_name = level_names[i % len(level_names)]
                tf.logging.info('Creating actor %d with level %s', i,
                                level_name)
                env = create_environment(level_name, seed=i + 1)
                actor_output = build_actor(agent, env, level_name, action_set)
                with tf.device(shared_job_device):
                    enqueue_ops.append(
                        queue.enqueue(nest.flatten(actor_output)))

        # Build learner.
        if is_learner:
            # Create global step, which is the number of environment frames
            # processed.
            g_step = tf.get_variable('num_environment_frames',
                                     initializer=tf.zeros_initializer(),
                                     shape=[],
                                     dtype=tf.int64,
                                     trainable=False,
                                     collections=[
                                         tf.GraphKeys.GLOBAL_STEP,
                                         tf.GraphKeys.GLOBAL_VARIABLES
                                     ])
            # Create batch (time major) and recreate structure.
            dequeued = queue.dequeue_many(FLAGS.batch_size)
            dequeued = nest.pack_sequence_as(structure, dequeued)

            def make_time_major(s):
                return nest.map_structure(
                    lambda t: tf.transpose(t, [1, 0] + list(
                        range(t.shape.ndims))[2:]), s)

            dequeued = dequeued._replace(
                env_outputs=make_time_major(dequeued.env_outputs),
                agent_outputs=make_time_major(dequeued.agent_outputs))

            with tf.device("/gpu"):
                # Using StagingArea allows us to prepare the next batch and send it to
                # the GPU while we're performing a training step. This adds up to 1
                # step policy lag.
                flattened_output = nest.flatten(dequeued)
                area = tf.contrib.staging.StagingArea(
                    [t.dtype for t in flattened_output],
                    [t.shape for t in flattened_output])
                stage_op = area.put(flattened_output)
                data_from_actors = nest.pack_sequence_as(structure, area.get())
                # Unroll agent on sequence, create losses and update ops.
                if hasattr(data_from_actors, 'agent_state'):
                    agent_state = data_from_actors.agent_state
                else:
                    agent_state = agent.initial_state(1)
                output, optimizer = build_learner(
                    agent,
                    agent_state=agent_state,
                    env_outputs=data_from_actors.env_outputs,
                    agent_outputs=data_from_actors.agent_outputs,
                    g_step=g_step)

        # Create MonitoredSession (to run the graph, checkpoint and log).
        is_chief = is_learner  # MonitoredTrainingSession inits all global variables
        hooks = [py_process.PyProcessHook()]
        if is_learner:
            # for variable initialization across learners
            hooks.append(hvd.BroadcastGlobalVariablesHook(0))
        tf.logging.info('Creating MonitoredSession, is_chief %s', is_chief)
        if is_learner:
            tf.logging.info('At rank %d', hvd.rank())
        # rank 0 takes care of ckpt saving
        checkpoint_dir = FLAGS.logdir if is_learner and hvd.rank(
        ) == 0 else None
        with tf.train.MonitoredTrainingSession(server.target,
                                               is_chief=is_chief,
                                               checkpoint_dir=checkpoint_dir,
                                               save_checkpoint_secs=600,
                                               save_summaries_secs=30,
                                               log_step_count_steps=50000,
                                               config=config,
                                               hooks=hooks) as session:

            if is_learner:
                # tb Logging
                summary_writer = (tf.summary.FileWriterCache.get(FLAGS.logdir)
                                  if hvd.rank() == 0 else None)

                # Prepare data for first run.
                session.run_step_fn(
                    lambda step_context: step_context.session.run(stage_op))

                # Execute learning and track performance.
                num_env_frames_v = 0
                while num_env_frames_v < FLAGS.total_environment_frames:
                    level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run(
                        (data_from_actors.level_name, ) + output +
                        (stage_op, ))
                    level_names_v = np.repeat([level_names_v], done_v.shape[0],
                                              0)

                    for level_name, episode_return, episode_step in zip(
                            level_names_v[done_v],
                            infos_v.episode_return[done_v],
                            infos_v.episode_step[done_v]):
                        episode_frames = episode_step

                        tf.logging.info(
                            'learner rank: %d, Env: %s Episode return: %f',
                            hvd.rank(), level_name, episode_return)

                        if hvd.rank() == 0:  # tb Logging
                            summary = tf.summary.Summary()
                            summary.value.add(tag=level_name +
                                              '/episode_return',
                                              simple_value=episode_return)
                            summary.value.add(tag=level_name +
                                              '/episode_frames',
                                              simple_value=episode_frames)
                            summary_writer.add_summary(summary,
                                                       num_env_frames_v)
            else:
                # Execute actors (they just need to enqueue their output).
                while True:
                    session.run(enqueue_ops)