def test_list_directory(tmp_path: pathlib.Path) -> None: base_path = tensorboard.get_base_path({"base_path": BASE_PATH}, manager=True) sync_path = tensorboard.get_sync_path(test_util.get_dummy_env()) manager = tensorboard.SharedFSTensorboardManager(str(tmp_path), base_path, sync_path) full_event_path = BASE_PATH.joinpath("tensorboard", "events.out.tfevents.example") assert set(manager.list_tfevents()) == {full_event_path}
def __init__(self) -> None: """ TorchWriter uses pytorch file writers and summary operations to write out tfevent files containing scalar batch metrics. """ super().__init__() self.writer: Any = SummaryWriter(log_dir=tensorboard.get_base_path( {})) # type: ignore
def move_tensorboard_files(event_dir: pathlib.Path) -> None: """ Given a TensorFlow Estimator model directory, find all nested Tensorboard files and move them to the TensorBoard log directory. """ tensorboard_dir = tensorboard.get_base_path({}) tensorboard_files = tensorboard.util.find_tb_files(event_dir) for file in tensorboard_files: file.rename(tensorboard_dir.joinpath(file.name))
def __init__(self, *args: Any, **kwargs: Any): self.workload_end_count = 0 user_log_dir = kwargs.pop("log_dir", None) if user_log_dir is not None: logging.warning( f"arg log_dir={user_log_dir} to det.keras.callbacks.TensorBoard will be ignored" ) log_dir = str(tensorboard.get_base_path({}).resolve()) tf.keras.callbacks.TensorBoard.__init__(self, log_dir=log_dir, *args, **kwargs)
def move_tf_events(event_dir: pathlib.Path) -> None: """ Given a TensorFlow Estimator model directory, find all nested tfevents files and move them to the TensorBoard log directory. For the most part, we expect only one tfevents file in the root_dir tree. This recursive search for tfevents is an extra measure to make sure we do not miss any events. """ tensorboard_dir = tensorboard.get_base_path({}) for event_file in event_dir.rglob("*tfevents*"): event_file.rename(tensorboard_dir.joinpath(event_file.name))
def set_profiler(self, *args: List[str], **kwargs: Any) -> None: """ Sets a torch profiler instance on the trial context to be called in _pytorch_trial when training. """ self.profiler = torch.profiler.profile( on_trace_ready=torch.profiler.tensorboard_trace_handler( str(get_base_path({}))), *args, **kwargs, )
def __init__(self) -> None: super().__init__() self.writer = EventFileWriter(logdir=str(tensorboard.get_base_path( {})), filename_suffix=None) self.createSummary = tf.Summary # _seen_summary_tags is vendored from TensorFlow: tensorflow/python/summary/writer/writer.py # This set contains tags of Summary Values that have been encountered # already. The motivation here is that the SummaryWriter only keeps the # metadata property (which is a SummaryMetadata proto) of the first Summary # Value encountered for each tag. The SummaryWriter strips away the # SummaryMetadata for all subsequent Summary Values with tags seen # previously. This saves space. self._seen_summary_tags: Set[str] = set()
def test_list_tb_files(tmp_path: pathlib.Path) -> None: env = get_dummy_env() base_path = get_base_path({"base_path": BASE_PATH}, manager=True) sync_path = get_sync_path(env.det_cluster_id, env.det_experiment_id, env.det_trial_id) manager = SharedFSTensorboardManager(str(tmp_path), base_path, sync_path) test_files = [ "79375caf89e9.kernel_stats.pb", "79375caf89e9.memory_profile.json.gz", "events.out.tfevents.example", ] test_filepaths = [BASE_PATH.joinpath("tensorboard", test_file) for test_file in test_files] tb_files = manager.list_tb_files(0) assert set(test_filepaths) == set(tb_files)
def __init__(self, estimator_trial_controller: "EstimatorTrialController") -> None: self.batches_processed_in_step = 0 self.estimator_trial_controller = estimator_trial_controller # step_metrics keeps track of the metrics associated with a step (see # DeterminedControlCallback). It is cleared in between training steps. self.step_metrics = [] # type: List[Dict[str, Any]] self.num_batches = None # type: Optional[int] self._global_step_of_last_checkpoint = None # type: Optional[int] self._session = None # type: Optional[tf.Session] self._current_global_step = None # type: Optional[int] self._saver = None # type: Optional[tf.train.Saver] self._writer = tf.compat.v1.summary.FileWriter(tensorboard.get_base_path({})) # Store the response_func for train_for_step workloads while we do the training. self.train_response_func = None # type: Optional[workload.ResponseFunc]
def __new__(cls, *args: Any, **kwargs: Any) -> callbacks.TFEventWriter: fixed_parameters = ["logdir", "split_files"] for param in fixed_parameters: if param in kwargs: logging.warn( f"parameter {param} to TFEventWriter will be ignored") kwargs.pop(param) # Tensorpacks TFEventWriter requires that the logdir is created before # the TFEventWriter is created. base_path = tensorboard.get_base_path({}) base_path.mkdir(parents=True, exist_ok=True) kwargs["logdir"] = str(base_path) # split_files forces the TFEventWriter to start a new log file after # flushing tf events to disk. This creates distinct files that # TensorboardManagers expect for syncing tf events to persistent storage. kwargs["split_files"] = True return callbacks.TFEventWriter(*args, **kwargs)
def __init__(self, *args: Any, **kwargs: Any): log_dir = str(tensorboard.get_base_path({}).resolve()) super().__init__(log_dir=log_dir, *args, **kwargs)
def __init__(self) -> None: super().__init__() self.writer: Any = SummaryWriter(log_dir=tensorboard.get_base_path({})) # type: ignore