Esempio n. 1
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     # type check: env/agent must both be multi- or single-user
     if self._agent.multi_user and not isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Multi-user agent requires multi-user environment.')
     if not self._agent.multi_user and isinstance(
             self._env.environment, environment.MultiUserEnvironment):
         raise ValueError(
             'Single-user agent requires single-user environment.')
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Esempio n. 2
0
    def init_subagents(self,
                       model_fns,
                       obs_specs,
                       act_specs,
                       policy_clses,
                       n_subagents=0,
                       subagent_variable_scopes=[]):
        assert n_subagents == len(model_fns) == len(obs_specs) == len(
            policy_clses
        ) == len(act_specs) == len(
            subagent_variable_scopes
        ), "The number of subagents is not equal to the number of model_fns, or obs_specs, or act_specs"

        self.subagents = {}
        for model_fn, obs_spec, act_spec, policy_cls, subagent_variable_scope in zip(
                model_fns, obs_specs, act_specs, policy_clses,
                subagent_variable_scopes):
            subagent = Subagent()
            subagent_dir = self.subagent_dirs[subagent_variable_scope]

            print(LOGGING_MSG_HEADER, 'resetting tf graph for subagent: ',
                  subagent_variable_scope)
            tf.reset_default_graph()
            subagent.sess_mgr = SessionManager(
                base_path=subagent_dir,
                training_enabled=False,
                model_variable_scope=subagent_variable_scope)
            subagent.sess = subagent.sess_mgr.sess
            subagent.variable_scope = subagent_variable_scope

            with subagent.sess.graph.as_default():
                with tf.name_scope(
                        subagent.sess_mgr.main_tf_vs.original_name_scope):
                    subagent.model = model_fn(obs_spec, act_spec)
                    subagent.value = subagent.model.outputs[-1]
                    subagent.policy = policy_cls(act_spec,
                                                 subagent.model.outputs[:-1])
                    print(LOGGING_MSG_HEADER, subagent.variable_scope,
                          ' model setup successful')

                    subagent.sess_mgr.restore_or_init()
                    print(LOGGING_MSG_HEADER, subagent.variable_scope,
                          ' model restore successful')

            self.subagents[subagent_variable_scope] = subagent

        self.subagents_idx_key_dict = {}
        for idx, subagent_variable_scope in enumerate(self.subagents.keys()):
            self.subagents_idx_key_dict[idx] = subagent_variable_scope

        print(LOGGING_MSG_HEADER + "{} subagents are available: {}".format(
            self.n_subagents, self.subagents_idx_key_dict))
        print("type their respective index to select them")
    def unbundle(self, checkpoint_dir, iteration_number, bundle_dictionary):
        """Restores the agent from a checkpoint.

    Restores the agent's Python objects to those specified in bundle_dictionary,
    and restores the TensorFlow objects to those specified in the
    checkpoint_dir. If the checkpoint_dir does not exist, will not reset the
      agent's state.

    Args:
      checkpoint_dir: str, path to the checkpoint saved by `tf.Save`.
      iteration_number: int, checkpoint version.
      bundle_dictionary: Dictionary containing this class's Python objects.

    Returns:
      A boolean indicating whether unbundling was successful.
    """
        print("=========================")
        print("ENTERED UNBUNDLE FUNCTION")
        print("=========================")
        print(self.partial_reload)
        if self.partial_reload == True:
            print(f"partial reload = {self.partial_reload}")
            print("Entered Partial Reload Loop")
            try:
                # replay.load() will throw a GOSError if it does not find all the
                # necessary files, in which case we should abort the process.
                self._replay.load(checkpoint_dir, iteration_number)
                print("Replay Memory loading successful")
            except tf.errors.NotFoundError:
                return False
        else:
            print("Didn't enter partial reload loop")
        for key in self.__dict__:
            if key in bundle_dictionary:
                self.__dict__[key] = bundle_dictionary[key]
        print("==============================")
        print("Done loading bundle dictionary")
        print("==============================")

        tf.reset_default_graph()
        self._saver.restore(self._sess,
                            tf.train.latest_checkpoint(checkpoint_dir))
        print("Saver restored latest checkpoint")
        return True
Esempio n. 4
0
 def _set_up(self, eval_mode):
     """Sets up the runner by creating and initializing the agent."""
     # Reset the tf default graph to avoid name collisions from previous runs
     # before doing anything else.
     tf.reset_default_graph()
     self._summary_writer = tf.summary.FileWriter(self._output_dir)
     if self._episode_log_file:
         self._episode_writer = tf.python_io.TFRecordWriter(
             os.path.join(self._output_dir, self._episode_log_file))
     # Set up a session and initialize variables.
     self._sess = tf.Session(config=tf.ConfigProto(
         allow_soft_placement=True))
     self._agent = self._create_agent_fn(
         self._sess,
         self._env,
         summary_writer=self._summary_writer,
         eval_mode=eval_mode)
     self._summary_writer.add_graph(graph=tf.get_default_graph())
     self._sess.run(tf.global_variables_initializer())
     self._sess.run(tf.local_variables_initializer())
Esempio n. 5
0
def setup_and_teardown():
    yield
    tf.reset_default_graph()
Esempio n. 6
0
def reset():
    with tf.get_default_graph() as g:
        tf.reset_default_graph()
Esempio n. 7
0
def run_experiment(agent,
                   environment,
                   start_iteration,
                   obs_stacker,
                   experiment_logger,
                   experiment_checkpointer,
                   checkpoint_dir,
                   num_iterations=200,
                   training_steps=5000,
                   logging_file_prefix='log',
                   log_every_n=100,
                   checkpoint_every_n=1):
    """Runs a full experiment, spread over multiple iterations."""
    tf.logging.info('Beginning training...')
    if num_iterations <= start_iteration:
        tf.logging.warning('num_iterations (%d) < start_iteration(%d)',
                           num_iterations, start_iteration)
        return
    """ 
  run_one_episode() updates the metrics
  metrics compute tf.summaries
  """

    # -----------

    # train_summary_writer = tf.compat.v2.summary.create_file_writer(checkpoint_dir+'_tensorboard/', flush_millis=1000)
    # train_summary_writer.set_as_default()

    # metric_avg_return = AverageReturnMetric()
    # env_steps = EnvironmentSteps()

    # observers = [metric_avg_return]
    # global_step = tf.compat.v1.train.get_or_create_global_step()
    # write graph to disk
    # with tf.compat.v2.summary.record_if(lambda: tf.math.equal(global_step % 5, 0)):
    #   summary_avg_return = tf.identity(metric_avg_return.tf_summaries(train_step=global_step))
    #   with tf.Session() as sess:
    #     initialize_uninitialized_variables(sess)
    #     sess.run(train_summary_writer.init())
    # -----------
    tf.reset_default_graph()
    sess = tf.Session()
    for iteration in range(start_iteration, num_iterations):
        # # -----------
        # global_step_val = sess.run(global_step)
        # # -----------
        # start_time = time.time()
        statistics = run_one_iteration(agent,
                                       environment,
                                       obs_stacker,
                                       iteration,
                                       training_steps,
                                       observers=None)
        # tf.logging.info('Iteration %d took %d seconds', iteration, time.time() - start_time)
        # start_time = time.time()
        log_experiment(experiment_logger, iteration, statistics,
                       logging_file_prefix, log_every_n)
        # tf.logging.info('Logging iteration %d took %d seconds', iteration, time.time() - start_time)

        # start_time = time.time()
        checkpoint_experiment(experiment_checkpointer, agent,
                              experiment_logger, iteration, checkpoint_dir,
                              checkpoint_every_n)
        summary_writer = tf.summary.FileWriter(checkpoint_dir + '/summary/')
        summary = tf.Summary()
        summary.value.add(tag='AverageReturn/EnvironmentSteps',
                          simple_value=statistics['average_return'][0])
        summary_writer.add_summary(summary, statistics['env_steps'][0])
        summary_writer.flush()