Beispiel #1
0
 def add_profile(self, epoch, logs, run_metadata: tf.RunMetadata,
                 profiler: tf.profiler.Profiler,
                 profile_writer: tf.summary.FileWriter, save_path: str):
     timeline_path = f"{save_path}/timeline"
     pathlib.Path(timeline_path).mkdir(exist_ok=True, parents=True)
     profiler.add_step(epoch, run_meta=run_metadata)
     opts = (option_builder.ProfileOptionBuilder(
         option_builder.ProfileOptionBuilder.time_and_memory()).with_step(
             epoch).with_timeline_output(f"{timeline_path}/step").build())
     profiler.profile_graph(options=opts)
     profile_writer.add_run_metadata(run_metadata, f"step{epoch}")
Beispiel #2
0
  def _train_step(self, obs, states, rewards, masks, actions, values, update,
                  writer: tf.summary.FileWriter = None, features=None, rewards_bonuses=None):
    """
    applies a training step to the model

    :param obs: ([float]) The input observations
    :param states: ([float]) The states (used for recurrent policies)
    :param rewards: ([float]) The rewards from the environment
    :param masks: ([bool]) Whether or not the episode is over (used for recurrent policies)
    :param actions: ([float]) The actions taken
    :param values: ([float]) The logits values
    :param update: (int) the current step iteration
    :param writer: (TensorFlow Summary.writer) the writer for tensorboard
    :return: (float, float, float) policy loss, value loss, policy entropy
    """
    advs = rewards - values
    cur_lr = None
    for _ in range(len(obs)):
      cur_lr = self.learning_rate_schedule.value()
    assert cur_lr is not None, "Error: the observation input array cannon be empty"

    rewards_bonuses = rewards_bonuses if self.use_sf else np.zeros_like(rewards_bonuses)
    td_map = {self.train_model.obs_ph: obs, self.actions_ph: actions, self.advs_ph: advs,
              self.rewards_ph: rewards + rewards_bonuses, self.learning_rate_ph: cur_lr,
              self.successor_feature_ph: features}
    if states is not None:
      td_map[self.train_model.states_ph] = states
      td_map[self.train_model.masks_ph] = masks

    if writer is not None:
      # run loss backprop with summary, but once every 10 runs save the metadata (memory, compute time, ...)
      if (1 + update) % 10 == 0:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        summary, policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
          [self.summary, self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop,
           self.sf_loss], td_map, options=run_options, run_metadata=run_metadata)
        writer.add_run_metadata(run_metadata, 'step%d' % (update * (self.n_batch + 1)))
      else:
        summary, policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
          [self.summary, self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop,
           self.sf_loss], td_map)
      writer.add_summary(summary, update * (self.n_batch + 1))

    else:
      policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
        [self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop, self.sf_loss], td_map)

    return policy_loss, value_loss, policy_entropy, sf_loss
Beispiel #3
0
def log_trace(cfg: TrainConfig, step: int, opts,
              summary_writer: tf.summary.FileWriter):

    logger.debug("Starting trace logging")
    fetched_timeline = timeline.Timeline(opts['run_metadata'].step_stats)
    chrome_trace = fetched_timeline.generate_chrome_trace_format(
        show_memory=True)
    with open(os.path.join(cfg.logdir, f'timeline_{step:05}.json'), 'w') as f:
        f.write(chrome_trace)
    summary_writer.add_run_metadata(opts['run_metadata'],
                                    f"step_{step:05}",
                                    global_step=step)
    logger.info(
        f"Saved trace metadata both to timeline_{step:05}.json and step_{step:05} in tensorboard"
    )