Beispiel #1
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     # type check: env/agent must both be multi- or single-user
     if self._agent.multi_user and not isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Multi-user agent requires multi-user environment.')
     if not self._agent.multi_user and isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Single-user agent requires single-user environment.')
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Beispiel #2
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.python_io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Beispiel #3
0
    def __init__(self,
                 base_dir,
                 data_load_fn=load_data,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250,
                 batch_size=100,
                 evaluation_inputs=None,
                 evaluation_size=None):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      data_load_fn: function that returns data as a tuple (inputs, outputs).
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      batch_size: int, batch size used for the training.
      evaluation_inputs: tuple of inputs to the generator that can be used
        during qualitative evaluation. If None, inputs set passed above will
        be used.
      evaluation_size: int, the number of images that should be generated
        randomly sampling from the data specified in evaluation_inputs. If
        None, all evaluation_inputs are generated.

    This constructor will take the following actions:
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize a generator.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        inputs, data_to_generate = data_load_fn()
        assert inputs is None or inputs.shape[0] == data_to_generate.shape[0]
        assert evaluation_inputs is None or \
               evaluation_inputs.shape[1:] == inputs.shape[1:]
        assert evaluation_inputs is not None or evaluation_size is not None, \
               'Either evaluation_inputs or evaluation_size has to be initialised.'

        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._data_to_generate = data_to_generate
        self._inputs = inputs
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._batch_size = batch_size
        self._evaluation_inputs = evaluation_inputs
        if self._evaluation_inputs is None:
            self._evaluation_inputs = inputs
        self._evaluation_size = evaluation_size
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        config = tf.ConfigProto(allow_soft_placement=True)
        # Allocate only subset of the GPU memory as needed which allows for running
        # multiple workers on the same GPU.
        config.gpu_options.allow_growth = True
        # Set up a session and initialize variables.
        self._sess = tf.Session('', config=config)
        self._generator = create_generator(self._sess,
                                           data_to_generate,
                                           inputs,
                                           summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
Beispiel #4
0
    def __init__(self,
                 base_dir,
                 create_agent_fn,
                 create_environment_fn=atari_lib.create_atari_environment,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250000,
                 evaluation_steps=125000,
                 max_steps_per_episode=27000,
                 reward_clipping=(-1, 1)):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      create_agent_fn: A function that takes as args a Tensorflow session and an
        environment, and returns an agent.
      create_environment_fn: A function which receives a problem name and
        creates a Gym environment for that problem (e.g. an Atari 2600 game).
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.
      reward_clipping: Tuple(int, int), with the minimum and maximum bounds for
        reward at each step. If `None` no clipping is applied.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        self._environment = create_environment_fn()
        # Set up a session and initialize variables.
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        self._sess = tf.Session('', config=config)

        self._agent = create_agent_fn(self._sess,
                                      self._environment,
                                      summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self._reward_clipping = reward_clipping
Beispiel #5
0
  def learn_metric(self, verbose=False):
    """Approximate the bisimulation metric by learning.

    Args:
      verbose: bool, whether to print verbose messages.
    """
    summary_writer = tf.summary.FileWriter(self.base_dir)
    global_step = tf.Variable(0, trainable=False)
    inc_global_step_op = tf.assign_add(global_step, 1)
    bisim_horizon = 0.0
    bisim_horizon_discount_value = 1.0
    if self.use_decayed_learning_rate:
      learning_rate = tf.train.exponential_decay(self.starting_learning_rate,
                                                 global_step,
                                                 self.num_iterations,
                                                 self.learning_rate_decay,
                                                 staircase=self.staircase)
    else:
      learning_rate = self.starting_learning_rate
    tf.summary.scalar('Learning/LearningRate', learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       epsilon=self.epsilon)
    train_op = self._build_train_op(optimizer)
    sync_op = self._build_sync_op()
    eval_op = tf.stop_gradient(self._build_eval_metric())
    eval_states = []
    # Build the evaluation tensor.
    for state in range(self.num_states):
      row, col = self.inverse_index_states[state]
      # We make the evaluation states at the center of each grid cell.
      eval_states.append([row + 0.5, col + 0.5])
    eval_states = np.array(eval_states, dtype=np.float64)
    normalized_bisim_metric = (
        self.bisim_metric / np.linalg.norm(self.bisim_metric))
    metric_errors = []
    average_metric_errors = []
    normalized_metric_errors = []
    average_normalized_metric_errors = []
    saver = tf.train.Saver(max_to_keep=3)
    with tf.Session() as sess:
      summary_writer.add_graph(graph=tf.get_default_graph())
      sess.run(tf.global_variables_initializer())
      merged_summaries = tf.summary.merge_all()
      for i in range(self.num_iterations):
        sampled_states = np.random.randint(self.num_states,
                                           size=(self.batch_size,))
        sampled_actions = np.random.randint(4,
                                            size=(self.batch_size,))
        if self.add_noise:
          sampled_noise = np.clip(
              np.random.normal(0, 0.1, size=(self.batch_size, 2)),
              -0.3, 0.3)
        sampled_action_names = [self.actions[x] for x in sampled_actions]
        next_states = [self.next_states[a][s]
                       for s, a in zip(sampled_states, sampled_action_names)]
        rewards = np.array([self.rewards[a][s]
                            for s, a in zip(sampled_states,
                                            sampled_action_names)])
        states = np.array(
            [self.inverse_index_states[x] for x in sampled_states])
        next_states = np.array([self.inverse_index_states[x]
                                for x in next_states])
        states = states.astype(np.float64)
        states += 0.5  # Place points in center of grid.
        next_states = next_states.astype(np.float64)
        next_states += 0.5
        if self.add_noise:
          states += sampled_noise
          next_states += sampled_noise

        _, summary = sess.run(
            [train_op, merged_summaries],
            feed_dict={self.s1_ph: states,
                       self.s2_ph: next_states,
                       self.action_ph: sampled_actions,
                       self.rewards_ph: rewards,
                       self.bisim_horizon_ph: bisim_horizon,
                       self.eval_states_ph: eval_states})
        summary_writer.add_summary(summary, i)
        if self.double_period_halfway and i > self.num_iterations / 2.:
          self.target_update_period *= 2
          self.double_period_halfway = False
        if i % self.target_update_period == 0:
          bisim_horizon = 1.0 - bisim_horizon_discount_value
          bisim_horizon_discount_value *= self.bisim_horizon_discount
          sess.run(sync_op)
        # Now compute difference with exact metric.
        self.learned_distance = sess.run(
            eval_op, feed_dict={self.eval_states_ph: eval_states})
        self.learned_distance = np.reshape(self.learned_distance,
                                           (self.num_states, self.num_states))
        metric_difference = np.max(
            abs(self.learned_distance - self.bisim_metric))
        average_metric_difference = np.mean(
            abs(self.learned_distance - self.bisim_metric))
        normalized_learned_distance = (
            self.learned_distance / np.linalg.norm(self.learned_distance))
        normalized_metric_difference = np.max(
            abs(normalized_learned_distance - normalized_bisim_metric))
        average_normalized_metric_difference = np.mean(
            abs(normalized_learned_distance - normalized_bisim_metric))
        error_summary = tf.Summary(value=[
            tf.Summary.Value(tag='Approx/Error',
                             simple_value=metric_difference),
            tf.Summary.Value(tag='Approx/AvgError',
                             simple_value=average_metric_difference),
            tf.Summary.Value(tag='Approx/NormalizedError',
                             simple_value=normalized_metric_difference),
            tf.Summary.Value(tag='Approx/AvgNormalizedError',
                             simple_value=average_normalized_metric_difference),
        ])
        summary_writer.add_summary(error_summary, i)
        sess.run(inc_global_step_op)
        if i % 100 == 0:
          # Collect statistics every 100 steps.
          metric_errors.append(metric_difference)
          average_metric_errors.append(average_metric_difference)
          normalized_metric_errors.append(normalized_metric_difference)
          average_normalized_metric_errors.append(
              average_normalized_metric_difference)
          saver.save(sess, os.path.join(self.base_dir, 'tf_ckpt'),
                     global_step=i)
        if self.debug and i % 100 == 0:
          self.pretty_print_metric(metric_type='learned')
          print('Iteration: {}'.format(i))
          print('Metric difference: {}'.format(metric_difference))
          print('Normalized metric difference: {}'.format(
              normalized_metric_difference))
      if self.add_noise:
        # Finally, if we have noise, we draw a bunch of samples to get estimates
        # of the distances between states.
        sampled_distances = {}
        for _ in range(self.total_final_samples):
          eval_states = []
          for state in range(self.num_states):
            row, col = self.inverse_index_states[state]
            # We make the evaluation states at the center of each grid cell.
            eval_states.append([row + 0.5, col + 0.5])
          eval_states = np.array(eval_states, dtype=np.float64)
          eval_noise = np.clip(
              np.random.normal(0, 0.1, size=(self.num_states, 2)),
              -0.3, 0.3)
          eval_states += eval_noise
          distance_samples = sess.run(
              eval_op, feed_dict={self.eval_states_ph: eval_states})
          distance_samples = np.reshape(distance_samples,
                                        (self.num_states, self.num_states))
          for s1 in range(self.num_states):
            for s2 in range(self.num_states):
              sampled_distances[(tuple(eval_states[s1]),
                                 tuple(eval_states[s2]))] = (
                                     distance_samples[s1, s2])
      else:
        # Otherwise we just use the last evaluation metric.
        sampled_distances = self.learned_distance
    learned_statistics = {
        'num_iterations': self.num_iterations,
        'metric_errors': metric_errors,
        'average_metric_errors': average_metric_errors,
        'normalized_metric_errors': normalized_metric_errors,
        'average_normalized_metric_errors': average_normalized_metric_errors,
        'learned_distances': sampled_distances,
    }
    self.statistics['learned'] = learned_statistics
    if verbose:
      self.pretty_print_metric(metric_type='learned')
Beispiel #6
0
def reset():
    with tf.get_default_graph() as g:
        tf.reset_default_graph()
    def __init__(self,
                 base_dir,
                 agent_creator,
                 create_environment_fn=create_atari_environment,
                 game_name=None,
                 checkpoint_file_prefix='ckpt',
                 logging_file_prefix='log',
                 log_every_n=1,
                 num_iterations=200,
                 training_steps=250000,
                 evaluation_steps=125000,
                 max_steps_per_episode=27000):
        """Initialize the Runner object in charge of running a full experiment.

    Args:
      base_dir: str, the base directory to host all required sub-directories.
      agent_creator: A function that takes as args a Tensorflow session and an
        Atari 2600 Gym environment, and returns an agent.
      create_environment_fn: A function which receives a game name and creates
        an Atari 2600 Gym environment.
      game_name: str, name of the Atari 2600 domain to run.
      sticky_actions: bool, whether to enable sticky actions in the environment.
      checkpoint_file_prefix: str, the prefix to use for checkpoint files.
      logging_file_prefix: str, prefix to use for the log files.
      log_every_n: int, the frequency for writing logs.
      num_iterations: int, the iteration number threshold (must be greater than
        start_iteration).
      training_steps: int, the number of training steps to perform.
      evaluation_steps: int, the number of evaluation steps to perform.
      max_steps_per_episode: int, maximum number of steps after which an episode
        terminates.

    This constructor will take the following actions:
    - Initialize an environment.
    - Initialize a `tf.Session`.
    - Initialize a logger.
    - Initialize an agent.
    - Reload from the latest checkpoint, if available, and initialize the
      Checkpointer object.
    """
        assert base_dir is not None
        self._logging_file_prefix = logging_file_prefix
        self._log_every_n = log_every_n
        self._num_iterations = num_iterations
        self._training_steps = training_steps
        self._evaluation_steps = evaluation_steps
        self._max_steps_per_episode = max_steps_per_episode
        self._base_dir = base_dir
        self._create_directories()
        self._summary_writer = tf.summary.FileWriter(self._base_dir)

        self._environment = create_environment_fn()
        # Set up a session and initialize variables.
        self._sess = tf.Session(
            '', config=tf.ConfigProto(allow_soft_placement=True))
        self._agent = agent_creator(self._sess,
                                    self._environment,
                                    summary_writer=self._summary_writer)
        self._summary_writer.add_graph(graph=tf.get_default_graph())
        self._sess.run(tf.global_variables_initializer())

        self._summary_helper = SummaryHelper(self._summary_writer)

        self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix)
        self._steps_done = 0

        self._total_timer = None