def _set_up(self, eval_mode): """Sets up the runner by creating and initializing the agent.""" # Reset the tf default graph to avoid name collisions from previous runs # before doing anything else. tf.reset_default_graph() self._summary_writer = tf.summary.FileWriter(self._output_dir) if self._episode_log_file: self._episode_writer = tf.io.TFRecordWriter( os.path.join(self._output_dir, self._episode_log_file)) # Set up a session and initialize variables. self._sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self._agent = self._create_agent_fn( self._sess, self._env, summary_writer=self._summary_writer, eval_mode=eval_mode) # type check: env/agent must both be multi- or single-user if self._agent.multi_user and not isinstance( self._env.environment, environment.MultiUserEnvironment): raise ValueError( 'Multi-user agent requires multi-user environment.') if not self._agent.multi_user and isinstance( self._env.environment, environment.MultiUserEnvironment): raise ValueError( 'Single-user agent requires single-user environment.') self._summary_writer.add_graph(graph=tf.get_default_graph()) self._sess.run(tf.global_variables_initializer()) self._sess.run(tf.local_variables_initializer())
def init_subagents(self, model_fns, obs_specs, act_specs, policy_clses, n_subagents=0, subagent_variable_scopes=[]): assert n_subagents == len(model_fns) == len(obs_specs) == len( policy_clses ) == len(act_specs) == len( subagent_variable_scopes ), "The number of subagents is not equal to the number of model_fns, or obs_specs, or act_specs" self.subagents = {} for model_fn, obs_spec, act_spec, policy_cls, subagent_variable_scope in zip( model_fns, obs_specs, act_specs, policy_clses, subagent_variable_scopes): subagent = Subagent() subagent_dir = self.subagent_dirs[subagent_variable_scope] print(LOGGING_MSG_HEADER, 'resetting tf graph for subagent: ', subagent_variable_scope) tf.reset_default_graph() subagent.sess_mgr = SessionManager( base_path=subagent_dir, training_enabled=False, model_variable_scope=subagent_variable_scope) subagent.sess = subagent.sess_mgr.sess subagent.variable_scope = subagent_variable_scope with subagent.sess.graph.as_default(): with tf.name_scope( subagent.sess_mgr.main_tf_vs.original_name_scope): subagent.model = model_fn(obs_spec, act_spec) subagent.value = subagent.model.outputs[-1] subagent.policy = policy_cls(act_spec, subagent.model.outputs[:-1]) print(LOGGING_MSG_HEADER, subagent.variable_scope, ' model setup successful') subagent.sess_mgr.restore_or_init() print(LOGGING_MSG_HEADER, subagent.variable_scope, ' model restore successful') self.subagents[subagent_variable_scope] = subagent self.subagents_idx_key_dict = {} for idx, subagent_variable_scope in enumerate(self.subagents.keys()): self.subagents_idx_key_dict[idx] = subagent_variable_scope print(LOGGING_MSG_HEADER + "{} subagents are available: {}".format( self.n_subagents, self.subagents_idx_key_dict)) print("type their respective index to select them")
def unbundle(self, checkpoint_dir, iteration_number, bundle_dictionary): """Restores the agent from a checkpoint. Restores the agent's Python objects to those specified in bundle_dictionary, and restores the TensorFlow objects to those specified in the checkpoint_dir. If the checkpoint_dir does not exist, will not reset the agent's state. Args: checkpoint_dir: str, path to the checkpoint saved by `tf.Save`. iteration_number: int, checkpoint version. bundle_dictionary: Dictionary containing this class's Python objects. Returns: A boolean indicating whether unbundling was successful. """ print("=========================") print("ENTERED UNBUNDLE FUNCTION") print("=========================") print(self.partial_reload) if self.partial_reload == True: print(f"partial reload = {self.partial_reload}") print("Entered Partial Reload Loop") try: # replay.load() will throw a GOSError if it does not find all the # necessary files, in which case we should abort the process. self._replay.load(checkpoint_dir, iteration_number) print("Replay Memory loading successful") except tf.errors.NotFoundError: return False else: print("Didn't enter partial reload loop") for key in self.__dict__: if key in bundle_dictionary: self.__dict__[key] = bundle_dictionary[key] print("==============================") print("Done loading bundle dictionary") print("==============================") tf.reset_default_graph() self._saver.restore(self._sess, tf.train.latest_checkpoint(checkpoint_dir)) print("Saver restored latest checkpoint") return True
def _set_up(self, eval_mode): """Sets up the runner by creating and initializing the agent.""" # Reset the tf default graph to avoid name collisions from previous runs # before doing anything else. tf.reset_default_graph() self._summary_writer = tf.summary.FileWriter(self._output_dir) if self._episode_log_file: self._episode_writer = tf.python_io.TFRecordWriter( os.path.join(self._output_dir, self._episode_log_file)) # Set up a session and initialize variables. self._sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self._agent = self._create_agent_fn( self._sess, self._env, summary_writer=self._summary_writer, eval_mode=eval_mode) self._summary_writer.add_graph(graph=tf.get_default_graph()) self._sess.run(tf.global_variables_initializer()) self._sess.run(tf.local_variables_initializer())
def setup_and_teardown(): yield tf.reset_default_graph()
def reset(): with tf.get_default_graph() as g: tf.reset_default_graph()
def run_experiment(agent, environment, start_iteration, obs_stacker, experiment_logger, experiment_checkpointer, checkpoint_dir, num_iterations=200, training_steps=5000, logging_file_prefix='log', log_every_n=100, checkpoint_every_n=1): """Runs a full experiment, spread over multiple iterations.""" tf.logging.info('Beginning training...') if num_iterations <= start_iteration: tf.logging.warning('num_iterations (%d) < start_iteration(%d)', num_iterations, start_iteration) return """ run_one_episode() updates the metrics metrics compute tf.summaries """ # ----------- # train_summary_writer = tf.compat.v2.summary.create_file_writer(checkpoint_dir+'_tensorboard/', flush_millis=1000) # train_summary_writer.set_as_default() # metric_avg_return = AverageReturnMetric() # env_steps = EnvironmentSteps() # observers = [metric_avg_return] # global_step = tf.compat.v1.train.get_or_create_global_step() # write graph to disk # with tf.compat.v2.summary.record_if(lambda: tf.math.equal(global_step % 5, 0)): # summary_avg_return = tf.identity(metric_avg_return.tf_summaries(train_step=global_step)) # with tf.Session() as sess: # initialize_uninitialized_variables(sess) # sess.run(train_summary_writer.init()) # ----------- tf.reset_default_graph() sess = tf.Session() for iteration in range(start_iteration, num_iterations): # # ----------- # global_step_val = sess.run(global_step) # # ----------- # start_time = time.time() statistics = run_one_iteration(agent, environment, obs_stacker, iteration, training_steps, observers=None) # tf.logging.info('Iteration %d took %d seconds', iteration, time.time() - start_time) # start_time = time.time() log_experiment(experiment_logger, iteration, statistics, logging_file_prefix, log_every_n) # tf.logging.info('Logging iteration %d took %d seconds', iteration, time.time() - start_time) # start_time = time.time() checkpoint_experiment(experiment_checkpointer, agent, experiment_logger, iteration, checkpoint_dir, checkpoint_every_n) summary_writer = tf.summary.FileWriter(checkpoint_dir + '/summary/') summary = tf.Summary() summary.value.add(tag='AverageReturn/EnvironmentSteps', simple_value=statistics['average_return'][0]) summary_writer.add_summary(summary, statistics['env_steps'][0]) summary_writer.flush()