def test_get_time_since_start(self): timer = Timer() time.sleep(2) expected = 2 self.assertEqual(expected, int(timer.get_time_since_start().split("s")[0]))
def __init__(self, multi_task_instance): self.test_task = multi_task_instance self.task_type = multi_task_instance.dataset_type self.config = registry.get("config") self.report = [] self.timer = Timer() self.training_config = self.config.training self.num_workers = self.training_config.num_workers self.batch_size = self.training_config.batch_size self.report_folder_arg = get_multimodelity_env(key="report_dir") self.experiment_name = self.training_config.experiment_name self.datasets = [] for dataset in self.test_task.get_datasets(): self.datasets.append(dataset) self.current_dataset_idx = -1 self.current_dataset = self.datasets[self.current_dataset_idx] self.save_dir = get_multimodelity_env(key="save_dir") self.report_folder = ckpt_name_from_core_args(self.config) self.report_folder += foldername_from_config_override(self.config) self.report_folder = os.path.join(self.save_dir, self.report_folder) self.report_folder = os.path.join(self.report_folder, "reports") if self.report_folder_arg: self.report_folder = self.report_folder_arg PathManager.mkdirs(self.report_folder)
def test_reset(self): timer = Timer() time.sleep(2) timer.reset() expected = 0 self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
def __init__(self, log_folder="./logs", iteration=0): # This would handle warning of missing tensorboard from torch.utils.tensorboard import SummaryWriter self.summary_writer = None self._is_master = is_master() self.timer = Timer() self.log_folder = log_folder self.time_format = "%Y-%m-%dT%H:%M:%S" if self._is_master: current_time = self.timer.get_time_hhmmss(None, format=self.time_format) tensorboard_folder = os.path.join( self.log_folder, f"tensorboard_{current_time}" ) self.summary_writer = SummaryWriter(tensorboard_folder)
def setup_output_folder(folder_only: bool = False): """Sets up and returns the output file where the logs will be placed based on the configuration passed. Usually "save_dir/logs/log_<timestamp>.txt". If env.log_dir is passed, logs will be directly saved in this folder. Args: folder_only (bool, optional): If folder should be returned and not the file. Defaults to False. Returns: str: folder or file path depending on folder_only flag """ save_dir = get_multimodelity_env(key="save_dir") time_format = "%Y_%m_%dT%H_%M_%S" log_filename = "train_" log_filename += Timer().get_time_hhmmss(None, format=time_format) log_filename += ".log" log_folder = os.path.join(save_dir, "logs") env_log_dir = get_multimodelity_env(key="log_dir") if env_log_dir: log_folder = env_log_dir if not PathManager.exists(log_folder): PathManager.mkdirs(log_folder) if folder_only: return log_folder log_filename = os.path.join(log_folder, log_filename) return log_filename
class TrainerProfilingMixin(ABC): profiler: Type[Timer] = Timer() def profile(self, text: str) -> None: if self.training_config.logger_level != "debug": return logging.debug(f"{text}: {self.profiler.get_time_since_start()}") self.profiler.reset()
class TensorboardLogger: def __init__(self, log_folder="./logs", iteration=0): # This would handle warning of missing tensorboard from torch.utils.tensorboard import SummaryWriter self.summary_writer = None self._is_master = is_master() self.timer = Timer() self.log_folder = log_folder self.time_format = "%Y-%m-%dT%H:%M:%S" if self._is_master: current_time = self.timer.get_time_hhmmss(None, format=self.time_format) tensorboard_folder = os.path.join( self.log_folder, f"tensorboard_{current_time}" ) self.summary_writer = SummaryWriter(tensorboard_folder) def __del__(self): if getattr(self, "summary_writer", None) is not None: self.summary_writer.close() def _should_log_tensorboard(self): if self.summary_writer is None or not self._is_master: return False else: return True def add_scalar(self, key, value, iteration): if not self._should_log_tensorboard(): return self.summary_writer.add_scalar(key, value, iteration) def add_scalars(self, scalar_dict, iteration): if not self._should_log_tensorboard(): return for key, val in scalar_dict.items(): self.summary_writer.add_scalar(key, val, iteration) def add_histogram_for_model(self, model, iteration): if not self._should_log_tensorboard(): return for name, param in model.named_parameters(): np_param = param.clone().cpu().data.numpy() self.summary_writer.add_histogram(name, np_param, iteration)
def test_get_current(self): timer = Timer() expected = 0 self.assertEqual(int(timer.get_current().split("ms")[0]), expected)
class TestReporter(Dataset): def __init__(self, multi_task_instance): self.test_task = multi_task_instance self.task_type = multi_task_instance.dataset_type self.config = registry.get("config") self.report = [] self.timer = Timer() self.training_config = self.config.training self.num_workers = self.training_config.num_workers self.batch_size = self.training_config.batch_size self.report_folder_arg = get_multimodelity_env(key="report_dir") self.experiment_name = self.training_config.experiment_name self.datasets = [] for dataset in self.test_task.get_datasets(): self.datasets.append(dataset) self.current_dataset_idx = -1 self.current_dataset = self.datasets[self.current_dataset_idx] self.save_dir = get_multimodelity_env(key="save_dir") self.report_folder = ckpt_name_from_core_args(self.config) self.report_folder += foldername_from_config_override(self.config) self.report_folder = os.path.join(self.save_dir, self.report_folder) self.report_folder = os.path.join(self.report_folder, "reports") if self.report_folder_arg: self.report_folder = self.report_folder_arg PathManager.mkdirs(self.report_folder) def next_dataset(self): if self.current_dataset_idx >= 0: self.flush_report() self.current_dataset_idx += 1 if self.current_dataset_idx == len(self.datasets): return False else: self.current_dataset = self.datasets[self.current_dataset_idx] logger.info(f"Predicting for {self.current_dataset.dataset_name}") return True def flush_report(self): if not is_master(): return name = self.current_dataset.dataset_name time_format = "%Y-%m-%dT%H:%M:%S" time = self.timer.get_time_hhmmss(None, format=time_format) filename = name + "_" if len(self.experiment_name) > 0: filename += self.experiment_name + "_" filename += self.task_type + "_" filename += time if self.config.evaluation.predict_file_format == "csv": filepath = os.path.join(self.report_folder, filename + ".csv") self.csv_dump(filepath) else: filepath = os.path.join(self.report_folder, filename + ".json") self.json_dump(filepath) logger.info(f"Wrote predictions for {name} to {os.path.abspath(filepath)}") self.report = [] def csv_dump(self, filepath): with PathManager.open(filepath, "w") as f: title = self.report[0].keys() cw = csv.DictWriter(f, title, delimiter=",", quoting=csv.QUOTE_MINIMAL) cw.writeheader() cw.writerows(self.report) def json_dump(self, filepath): with PathManager.open(filepath, "w") as f: json.dump(self.report, f) def get_dataloader(self): dataloader, _ = build_dataloader_and_sampler( self.current_dataset, self.training_config ) return dataloader def prepare_batch(self, batch): if hasattr(self.current_dataset, "prepare_batch"): batch = self.current_dataset.prepare_batch(batch) return batch def __len__(self): return len(self.current_dataset) def __getitem__(self, idx): return self.current_dataset[idx] def add_to_report(self, report, model): keys = ["id", "question_id", "image_id", "context_tokens", "captions", "scores"] for key in keys: report = self.reshape_and_gather(report, key) if not is_master(): return results = self.current_dataset.format_for_prediction(report) if hasattr(model, "format_for_prediction"): results = model.format_for_prediction(results, report) elif hasattr(model.module, "format_for_prediction"): results = model.module.format_for_prediction(results, report) self.report = self.report + results def reshape_and_gather(self, report, key): if key in report: num_dims = report[key].dim() if num_dims == 1: report[key] = gather_tensor(report[key]).view(-1) elif num_dims >= 2: # Collect dims other than batch other_dims = report[key].size()[1:] report[key] = gather_tensor(report[key]).view(-1, *other_dims) return report