Ejemplo n.º 1
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     # type check: env/agent must both be multi- or single-user
     if self._agent.multi_user and not isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Multi-user agent requires multi-user environment.')
     if not self._agent.multi_user and isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Single-user agent requires single-user environment.')
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Ejemplo n.º 2
0
def main(args):
    run_name = FLAGS.run_name or time.strftime('%Y%m%d-%H%M%S',
                                               time.localtime())
    output_dir = path.join(FLAGS.run_dir, run_name)

    gin.bind_parameter('SC2EnvironmentConfig.map_name', FLAGS.map)

    gin_files = []
    if path.exists(output_dir):
        print('Resuming', output_dir)
        gin_files.append(path.join(output_dir, 'operative_config-0.gin'))

    if FLAGS.gin_file:
        gin_files += FLAGS.gin_file

    gin.parse_config_files_and_bindings(gin_files,
                                        FLAGS.gin_param,
                                        finalize_config=True)

    env = VecEnv(SC2Environment, SC2EnvironmentConfig())
    try:
        agent = A2CAgent(env.spec,
                         callbacks=RewardSummaryHook(
                             summary_output_dir=output_dir,
                             write_summaries_secs=30))
        runner = Runner(env, agent)

        print_parameter_summary()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = FLAGS.gpu_memory_allow_growth
        if FLAGS.gpu_memory_fraction:
            config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction

        hooks = [gin.tf.GinConfigSaverHook(output_dir)]
        if FLAGS.step_limit:
            hooks.append(tf.train.StopAtStepHook(last_step=FLAGS.step_limit))
            hooks.append(LogProgressHook(FLAGS.step_limit))
        if FLAGS.profile:
            hooks.append(
                tf.train.ProfilerHook(save_secs=60, output_dir=output_dir))
        if FLAGS.debug:
            hooks.append(tf_debug.LocalCLIDebugHook())
        else:
            hooks.append(tf.train.NanTensorHook(agent.loss))
        with tf.train.MonitoredTrainingSession(
                config=config,
                hooks=hooks,
                checkpoint_dir=output_dir,
                save_summaries_secs=30,
                save_checkpoint_secs=FLAGS.save_checkpoint_secs,
                save_checkpoint_steps=FLAGS.save_checkpoint_steps) as sess:
            while not sess.should_stop():

                def step_fn(step_context):
                    runner.train(step_context, 512)

                sess.run_step_fn(step_fn)
    finally:
        env.close()
Ejemplo n.º 3
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.python_io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Ejemplo n.º 4
0
def train_q(dataset,
            policy,
            optimizer=None,
            pack_transition_fn=None,
            q_graph_fn=None,
            log_dir=None,
            master='',
            task=0,
            training_steps=None,
            max_training_steps=100000,
            reuse=False,
            init_checkpoint=None,
            update_target_every_n_steps=50,
            log_every_n_steps=None,
            save_checkpoint_steps=500,
            save_summaries_steps=500):
    """Self-contained learning loop for offline Q-learning.

  Code inspired by OpenAI Baselines' deepq.build_train. This function is
  compatible with discrete Q-learning graphs, continuous Q learning graphs, and
  SARSA.

  Args:
    dataset: tf.data.Dataset providing transitions.
    policy: Instance of TFDQNPolicy class that provides functor for building the
      critic function.
    optimizer: Optional instance of an optimizer. If not specified, creates an
      AdamOptimizer using the default constructor.
    pack_transition_fn: Optional function that performs additional processing
      of the transition. This is a convenience method for ad-hoc manipulation of
      transition data passed to the learning function after parsing.
    q_graph_fn: Function used to construct training objectives w.r.t. critic
      outputs.
    log_dir: Where to save model checkpoints and tensorboard summaries.
    master: Optional address of master worker. Specify this when doing
      distributed training.
    task: Optional worker task for distributed training. Defaults to solo master
      task on a single machine.
    training_steps: Optional number of steps to run training before terminating
      early. Max_training_steps remains unchanged - training will terminate
      after max_training_steps whether or not training_steps is specified.
    max_training_steps: maximum number of training iters.
    reuse: If True, reuse existing variables for all declared variables by this
      function.
    init_checkpoint: Optional checkpoint to restore prior to training. If not
      provided, variables are initialized using global_variables_initializer().
    update_target_every_n_steps: How many global steps (training) between
      copying the Q network weights (scope='q_func') to target network
      (scope='target_q_func').
    log_every_n_steps: How many global steps between logging loss tensors.
    save_checkpoint_steps: How many global steps between saving TF variables
      to a checkpoint file.
    save_summaries_steps: How many global steps between saving TF summaries.

  Returns:
    (int) Current `global_step` reached after training for training_steps, or
    `max_training_steps` if `global_step` has reached `max_training_steps`.

  Raises:
    ValueError: If a batch of transitions is empty or the zeroth element is
      empty, when it's supposed to be of length batch_size.
  """
    data_iterator = dataset.make_one_shot_iterator()

    transition = data_iterator.get_next()
    if pack_transition_fn:
        transition = pack_transition_fn(transition)

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer()

    q_func = policy.get_q_func(is_training=True, reuse=reuse)
    loss, all_summaries = q_graph_fn(q_func, transition)

    q_func_vars = contrib_framework.get_trainable_variables(scope='q_func')
    target_q_func_vars = contrib_framework.get_trainable_variables(
        scope='target_q_func')
    global_step = tf.train.get_or_create_global_step()

    # Only optimize q_func and update its batchnorm params.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='q_func')
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss,
                                      global_step=global_step,
                                      var_list=q_func_vars)

    chief_hooks = []
    hooks = []
    # Save summaries periodically.
    if save_summaries_steps is not None:
        chief_hooks.append(
            tf.train.SummarySaverHook(save_steps=save_summaries_steps,
                                      output_dir=log_dir,
                                      summary_op=all_summaries))

    # Stop after training_steps
    if max_training_steps:
        hooks.append(tf.train.StopAtStepHook(last_step=max_training_steps))

    # Report if loss tensor is NaN.
    hooks.append(tf.train.NanTensorHook(loss))

    if log_every_n_steps is not None:
        tensor_dict = {'global_step': global_step, 'train loss': loss}
        chief_hooks.append(
            tf.train.LoggingTensorHook(tensor_dict,
                                       every_n_iter=log_every_n_steps))

        # Measure how fast we are training per sec and save to summary.
        chief_hooks.append(
            tf.train.StepCounterHook(every_n_steps=log_every_n_steps,
                                     output_dir=log_dir))

    # If target network exists, periodically update target Q network with new
    # weights (frozen target network). We hack this by
    # abusing a LoggingTensorHook for this.
    if target_q_func_vars and update_target_every_n_steps is not None:
        update_target_expr = []
        for var, var_t in zip(sorted(q_func_vars, key=lambda v: v.name),
                              sorted(target_q_func_vars,
                                     key=lambda v: v.name)):
            update_target_expr.append(var_t.assign(var))
        update_target_expr = tf.group(*update_target_expr)

        with tf.control_dependencies([update_target_expr]):
            update_target = tf.constant(0)
        chief_hooks.append(
            tf.train.LoggingTensorHook(
                {'update_target': update_target},
                every_n_iter=update_target_every_n_steps))

    # Save checkpoints periodically, save all of them.
    saver = tf.train.Saver(max_to_keep=None)
    chief_hooks.append(
        tf.train.CheckpointSaverHook(log_dir,
                                     save_steps=save_checkpoint_steps,
                                     saver=saver,
                                     checkpoint_basename='model.ckpt'))

    # Save our experiment params to checkpoint dir.
    chief_hooks.append(
        gin.tf.GinConfigSaverHook(log_dir, summarize_config=True))

    session_config = tf.ConfigProto(log_device_placement=False)

    init_fn = None
    if init_checkpoint:
        assign_fn = contrib_framework.assign_from_checkpoint_fn(
            init_checkpoint, contrib_framework.get_model_variables())
        init_fn = lambda _, sess: assign_fn(sess)
    scaffold = tf.train.Scaffold(saver=saver, init_fn=init_fn)
    with tf.train.MonitoredTrainingSession(
            master=master,
            is_chief=(task == 0),
            config=session_config,
            checkpoint_dir=log_dir,
            scaffold=scaffold,
            hooks=hooks,
            chief_only_hooks=chief_hooks) as sess:
        np_step = 0
        while not sess.should_stop():
            np_step, _ = sess.run([global_step, train_op])
            if training_steps and np_step % training_steps == 0:
                break
        done = np_step >= max_training_steps
    return np_step, done
Ejemplo n.º 5
0
    def __init__(self,
                 base_dir,
                 data_load_fn=load_data,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250,
                 batch_size=100,
                 evaluation_inputs=None,
                 evaluation_size=None):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      data_load_fn: function that returns data as a tuple (inputs, outputs).
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      batch_size: int, batch size used for the training.
      evaluation_inputs: tuple of inputs to the generator that can be used
        during qualitative evaluation. If None, inputs set passed above will
        be used.
      evaluation_size: int, the number of images that should be generated
        randomly sampling from the data specified in evaluation_inputs. If
        None, all evaluation_inputs are generated.

    This constructor will take the following actions:
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize a generator.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        inputs, data_to_generate = data_load_fn()
        assert inputs is None or inputs.shape[0] == data_to_generate.shape[0]
        assert evaluation_inputs is None or \
               evaluation_inputs.shape[1:] == inputs.shape[1:]
        assert evaluation_inputs is not None or evaluation_size is not None, \
               'Either evaluation_inputs or evaluation_size has to be initialised.'

        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._data_to_generate = data_to_generate
        self._inputs = inputs
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._batch_size = batch_size
        self._evaluation_inputs = evaluation_inputs
        if self._evaluation_inputs is None:
            self._evaluation_inputs = inputs
        self._evaluation_size = evaluation_size
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        config = tf.ConfigProto(allow_soft_placement=True)
        # Allocate only subset of the GPU memory as needed which allows for running
        # multiple workers on the same GPU.
        config.gpu_options.allow_growth = True
        # Set up a session and initialize variables.
        self._sess = tf.Session('', config=config)
        self._generator = create_generator(self._sess,
                                           data_to_generate,
                                           inputs,
                                           summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
    def __init__(self,
                 num_actions=None,
                 observation_size=None,
                 num_players=None,
                 gamma=0.99,
                 update_horizon=1,
                 min_replay_history=500,
                 update_period=4,
                 stack_size=1,
                 target_update_period=500,
                 epsilon_fn=linearly_decaying_epsilon,
                 epsilon_train=0.02,
                 epsilon_eval=0.001,
                 epsilon_decay_period=1000,
                 graph_template=dqn_template,
                 tf_device='/cpu:*',
                 use_staging=True,
                 optimizer=tf.train.RMSPropOptimizer(learning_rate=.0025,
                                                     decay=0.95,
                                                     momentum=0.0,
                                                     epsilon=1e-6,
                                                     centered=True)):
        """Initializes the agent and constructs its graph.

    Args:
      num_actions: int, number of actions the agent can take at any state.
      observation_size: int, size of observation vector.
      num_players: int, number of players playing this game.
      gamma: float, discount factor as commonly used in the RL literature.
      update_horizon: int, horizon at which updates are performed, the 'n' in
        n-step update.
      min_replay_history: int, number of stored transitions before training.
      update_period: int, period between DQN updates.
      stack_size: int, number of observations to use as state.
      target_update_period: Update period for the target network.
      epsilon_fn: Function expecting 4 parameters: (decay_period, step,
        warmup_steps, epsilon), and which returns the epsilon value used for
        exploration during training.
      epsilon_train: float, final epsilon for training.
      epsilon_eval: float, epsilon during evaluation.
      epsilon_decay_period: int, number of steps for epsilon to decay.
      graph_template: function for building the neural network graph.
      tf_device: str, Tensorflow device on which to run computations.
      use_staging: bool, when True use a staging area to prefetch the next
        sampling batch.
      optimizer: Optimizer instance used for learning.
    """

        self.partial_reload = False

        tf.logging.info('Creating %s agent with the following parameters:',
                        self.__class__.__name__)
        tf.logging.info('\t gamma: %f', gamma)
        tf.logging.info('\t update_horizon: %f', update_horizon)
        tf.logging.info('\t min_replay_history: %d', min_replay_history)
        tf.logging.info('\t update_period: %d', update_period)
        tf.logging.info('\t target_update_period: %d', target_update_period)
        tf.logging.info('\t epsilon_train: %f', epsilon_train)
        tf.logging.info('\t epsilon_eval: %f', epsilon_eval)
        tf.logging.info('\t epsilon_decay_period: %d', epsilon_decay_period)
        tf.logging.info('\t tf_device: %s', tf_device)
        tf.logging.info('\t use_staging: %s', use_staging)
        tf.logging.info('\t optimizer: %s', optimizer)

        # Global variables.
        self.num_actions = num_actions
        self.observation_size = observation_size
        self.num_players = num_players
        self.gamma = gamma
        self.update_horizon = update_horizon
        self.cumulative_gamma = math.pow(gamma, update_horizon)
        self.min_replay_history = min_replay_history
        self.target_update_period = target_update_period
        self.epsilon_fn = epsilon_fn
        self.epsilon_train = epsilon_train
        self.epsilon_eval = epsilon_eval
        self.epsilon_decay_period = epsilon_decay_period
        self.update_period = update_period
        self.eval_mode = False
        self.training_steps = 0
        self.batch_staged = False
        self.optimizer = optimizer

        with tf.device(tf_device):
            # Calling online_convnet will generate a new graph as defined in
            # graph_template using whatever input is passed, but will always share
            # the same weights.
            online_convnet = tf.make_template('Online', graph_template)
            target_convnet = tf.make_template('Target', graph_template)
            # The state of the agent. The last axis is the number of past observations
            # that make up the state.
            states_shape = (1, observation_size, stack_size)
            self.state = np.zeros(states_shape)
            self.state_ph = tf.placeholder(tf.uint8,
                                           states_shape,
                                           name='state_ph')
            self.legal_actions_ph = tf.placeholder(tf.float32,
                                                   [self.num_actions],
                                                   name='legal_actions_ph')
            self._q = online_convnet(state=self.state_ph,
                                     num_actions=self.num_actions)
            self._replay = self._build_replay_memory(use_staging)
            self._replay_qs = online_convnet(self._replay.states,
                                             self.num_actions)
            self._replay_next_qt = target_convnet(self._replay.next_states,
                                                  self.num_actions)
            self._train_op = self._build_train_op()
            self._sync_qt_ops = self._build_sync_op()

            self._q_argmax = tf.argmax(self._q + self.legal_actions_ph,
                                       axis=1)[0]

        # Set up a session and initialize variables.
        self._sess = tf.Session(
            '', config=tf.ConfigProto(allow_soft_placement=True))
        self._init_op = tf.global_variables_initializer()
        self._sess.run(self._init_op)

        self._saver = tf.train.Saver(max_to_keep=3)

        # This keeps tracks of the observed transitions during play, for each
        # player.
        self.transitions = [[] for _ in range(num_players)]
Ejemplo n.º 7
0
    def __init__(self,
                 base_dir,
                 create_agent_fn,
                 create_environment_fn=atari_lib.create_atari_environment,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250000,
                 evaluation_steps=125000,
                 max_steps_per_episode=27000,
                 reward_clipping=(-1, 1)):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      create_agent_fn: A function that takes as args a Tensorflow session and an
        environment, and returns an agent.
      create_environment_fn: A function which receives a problem name and
        creates a Gym environment for that problem (e.g. an Atari 2600 game).
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.
      reward_clipping: Tuple(int, int), with the minimum and maximum bounds for
        reward at each step. If `None` no clipping is applied.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        self._environment = create_environment_fn()
        # Set up a session and initialize variables.
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        self._sess = tf.Session('', config=config)

        self._agent = create_agent_fn(self._sess,
                                      self._environment,
                                      summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self._reward_clipping = reward_clipping
Ejemplo n.º 8
0
    def initialize_session(self):
        """Initializes a tf Session."""
        if ENABLE_TF_OPTIMIZATIONS:
            self.sess = tf.Session()
        else:
            rewriter_config = rewriter_config_pb2.RewriterConfig(
                disable_model_pruning=True,
                constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
                arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
                remapping=rewriter_config_pb2.RewriterConfig.OFF,
                shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
                dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
                function_optimization=rewriter_config_pb2.RewriterConfig.OFF,
                layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
                loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
                memory_optimization=rewriter_config_pb2.RewriterConfig.
                NO_MEM_OPT)
            graph_options = tf.GraphOptions(rewrite_options=rewriter_config)
            session_config = tf.ConfigProto(graph_options=graph_options)
            self.sess = tf.Session(config=session_config)

        # Restore or initialize the variables.
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())
        if self.learner_config.checkpoint_for_eval:
            # Requested a specific checkpoint.
            self.saver.restore(self.sess,
                               self.learner_config.checkpoint_for_eval)
            tf.logging.info('Restored checkpoint: %s' %
                            self.learner_config.checkpoint_for_eval)
        else:
            # Continue from the latest checkpoint if one exists.
            # This handles fault-tolerance.
            latest_checkpoint = None
            if self.checkpoint_dir is not None:
                latest_checkpoint = tf.train.latest_checkpoint(
                    self.checkpoint_dir)
            if latest_checkpoint:
                self.saver.restore(self.sess, latest_checkpoint)
                tf.logging.info('Restored checkpoint: %s' % latest_checkpoint)
            else:
                tf.logging.info('No previous checkpoint.')
                self.sess.run(tf.global_variables_initializer())
                self.sess.run(tf.local_variables_initializer())

        # For episodic models, potentially use pretrained weights at the start of
        # training. If this happens it will overwrite the embedding weights, but
        # taking care to not restore the Adam parameters.
        if self.learner_config.pretrained_checkpoint and not self.sess.run(
                tf.train.get_global_step()):
            self.saver.restore(self.sess,
                               self.learner_config.pretrained_checkpoint)
            tf.logging.info('Restored checkpoint: %s' %
                            self.learner_config.pretrained_checkpoint)
            # We only want the embedding weights of the checkpoint we just restored.
            # So we re-initialize everything that's not an embedding weight. Also,
            # since this episodic finetuning procedure is a different optimization
            # problem than the original training of the baseline whose embedding
            # weights are re-used, we do not reload ADAM's variables and instead learn
            # them from scratch.
            vars_to_reinit, embedding_var_names, vars_to_reinit_names = [], [], []
            for var in tf.global_variables():
                if (any(keyword in var.name for keyword in EMBEDDING_KEYWORDS)
                        and 'adam' not in var.name.lower()):
                    embedding_var_names.append(var.name)
                    continue
                vars_to_reinit.append(var)
                vars_to_reinit_names.append(var.name)
            tf.logging.info('Initializing all variables except for %s.' %
                            embedding_var_names)
            self.sess.run(tf.variables_initializer(vars_to_reinit))
            tf.logging.info('Re-initialized vars %s.' % vars_to_reinit_names)
    def __init__(self,
                 base_dir,
                 agent_creator,
                 create_environment_fn=create_atari_environment,
                 game_name=None,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250000,
                 evaluation_steps=125000,
                 max_steps_per_episode=27000):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      agent_creator: A function that takes as args a Tensorflow session and an
        Atari 2600 Gym environment, and returns an agent.
      create_environment_fn: A function which receives a game name and creates
        an Atari 2600 Gym environment.
      game_name: str, name of the Atari 2600 domain to run.
      sticky_actions: bool, whether to enable sticky actions in the environment.
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        self._environment = create_environment_fn()
        # Set up a session and initialize variables.
        self._sess = tf.Session(
            '', config=tf.ConfigProto(allow_soft_placement=True))
        self._agent = agent_creator(self._sess,
                                    self._environment,
                                    summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._summary_helper = SummaryHelper(self._summary_writer)

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self._steps_done = 0

        self._total_timer = None