Beispiel #1
0
def visualize(logdir, outdir, num_agents, num_episodes, checkpoint=None, env_processes=True):
  """Recover checkpoint and render videos from it.

  Args:
    logdir: Logging directory of the trained algorithm.
    outdir: Directory to store rendered videos in.
    num_agents: Number of environments to simulate in parallel.
    num_episodes: Total number of episodes to simulate.
    checkpoint: Checkpoint name to load; defaults to most recent.
    env_processes: Whether to step environments in separate processes.
  """
  config = utility.load_config(logdir)
  with config.unlocked:
    config.network = functools.partial(utility.define_network, config.network, config)
    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
    config.value_optimizer = getattr(tf.train, config.value_optimizer)
  with tf.device('/cpu:0'):
    batch_env = utility.define_batch_env(lambda: _create_environment(config, outdir), num_agents,
                                         env_processes)
    graph = utility.define_simulation_graph(batch_env, config.algorithm, config)
    total_steps = num_episodes * config.max_length
    loop = _define_loop(graph, total_steps)
  saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
    utility.initialize_variables(sess, saver, config.logdir, checkpoint, resume=True)
    for unused_score in loop.run(sess, saver, total_steps):
      pass
  batch_env.close()
Beispiel #2
0
def train(config, env_processes):
  """Training and evaluation entry point yielding scores.

  Resolves some configuration attributes, creates environments, graph, and
  training loop. By default, assigns all operations to the CPU.

  Args:
    config: Object providing configurations via attributes.
    env_processes: Whether to step environments in separate processes.

  Yields:
    Evaluation scores.
  """
  tf.reset_default_graph()
  with config.unlocked:
    config.network = functools.partial(
        utility.define_network, config.network, config)
    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
    config.value_optimizer = getattr(tf.train, config.value_optimizer)
  if config.update_every % config.num_agents:
    tf.logging.warn('Number of agents should divide episodes per update.')
  with tf.device('/cpu:0'):
    batch_env = utility.define_batch_env(
        lambda: _create_environment(config),
        config.num_agents, env_processes)
    graph = utility.define_simulation_graph(
        batch_env, config.algorithm, config)
    loop = _define_loop(
        graph, config.logdir,
        config.update_every * config.max_length,
        config.eval_episodes * config.max_length)
    total_steps = int(
        config.steps / config.update_every *
        (config.update_every + config.eval_episodes))
  # Exclude episode related variables since the Python state of environments is
  # not checkpointed and thus new episodes start after resuming.
  saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
  sess_config = tf.ConfigProto(allow_soft_placement=True)
  sess_config.gpu_options.allow_growth = True
  with tf.Session(config=sess_config) as sess:
    utility.initialize_variables(sess, saver, config.logdir)
    for score in loop.run(sess, saver, total_steps):
      yield score
  batch_env.close()
Beispiel #3
0
def train(config, env_processes):
    """Training and evaluation entry point yielding scores.

  Resolves some configuration attributes, creates environments, graph, and
  training loop. By default, assigns all operations to the CPU.

  Args:
    config: Object providing configurations via attributes.
    env_processes: Whether to step environments in separate processes.

  Yields:
    Evaluation scores.
  """
    tf.reset_default_graph()
    with config.unlocked:
        config.network = functools.partial(utility.define_network,
                                           config.network, config)
        config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
        config.value_optimizer = getattr(tf.train, config.value_optimizer)
    if config.update_every % config.num_agents:
        tf.logging.warn(
            'Number of algorithms should divide episodes per update.')
    with tf.device('/cpu:0'):
        batch_env = utility.define_batch_env(
            lambda: _create_environment(config), config.num_agents,
            env_processes)
        graph = utility.define_simulation_graph(batch_env, config.algorithm,
                                                config)
        loop = _define_loop(graph, config.logdir,
                            config.update_every * config.max_length,
                            config.eval_episodes * config.max_length)
        total_steps = int(config.steps / config.update_every *
                          (config.update_every + config.eval_episodes))
    # Exclude episode related variables since the Python state of environments is
    # not checkpointed and thus new episodes start after resuming.
    saver = utility.define_saver(exclude=(r'.*_temporary/.*', ))
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    with tf.Session(config=sess_config) as sess:
        utility.initialize_variables(sess, saver, config.logdir)
        for score in loop.run(sess, saver, total_steps):
            yield score
    batch_env.close()
Beispiel #4
0
def visualize(logdir,
              outdir,
              num_agents,
              num_episodes,
              checkpoint=None,
              env_processes=True):
    """Recover checkpoint and render videos from it.

  Args:
    logdir: Logging directory of the trained algorithm.
    outdir: Directory to store rendered videos in.
    num_agents: Number of environments to simulate in parallel.
    num_episodes: Total number of episodes to simulate.
    checkpoint: Checkpoint name to load; defaults to most recent.
    env_processes: Whether to step environments in separate processes.
  """
    config = utility.load_config(logdir)
    with config.unlocked:
        config.network = functools.partial(utility.define_network,
                                           config.network, config)
        config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
        config.value_optimizer = getattr(tf.train, config.value_optimizer)
    with tf.device('/cpu:0'):
        batch_env = utility.define_batch_env(
            lambda: _create_environment(config, outdir), num_agents,
            env_processes)
        graph = utility.define_simulation_graph(batch_env, config.algorithm,
                                                config)
        total_steps = num_episodes * config.max_length
        loop = _define_loop(graph, total_steps)
    saver = utility.define_saver(exclude=(r'.*_temporary/.*', r'global_step'))
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    with tf.Session(config=sess_config) as sess:
        utility.initialize_variables(sess,
                                     saver,
                                     config.logdir,
                                     checkpoint,
                                     resume=True)
        for unused_score in loop.run(sess, saver, total_steps):
            pass
    batch_env.close()