def save_exam(exam: dict, experiment_dir: str): """ """ exam_dir = os.path.join(experiment_dir, "exams", exam["exam_id"]) ensure_dir_exists(exam_dir) for saliency_map in exam.get("saliency_maps", []): ani = saliency_map["ani"] ani_file_name = f"saliency_ani_{saliency_map['ani_task']}.gif" ani.save(os.path.join(exam_dir, ani_file_name), writer='imagemagick') saliency_map["ani"] = ani_file_name #TODO: save saliency plot print([ prob for prob in exam["targets"][saliency_map['ani_task']].data.numpy() ]) with open(os.path.join(exam_dir, "exam.json"), 'w') as f: exam_summary = { "exam_id": exam["exam_id"], "patient_id": exam["patient_id"], "task": saliency_map['ani_task'], "targets": [ float(prob) for prob in exam["targets"][ saliency_map['ani_task']].data.numpy() ], "target_prob": exam["target_probs"] } json.dump(exam_summary, f, indent=4)
def _save_weights(self, name="last"): """ """ ensure_dir_exists(self.remote_model_dir) remote_weights_path = os.path.join( self.remote_model_dir, f"{self.experiment_t}_{name}_weights") self.model.save_weights(remote_weights_path) self.model_dir = os.path.join(self.dir, name) ensure_dir_exists(self.model_dir) link_weights_path = os.path.join(self.model_dir, "weights.link") self._link_model(remote_weights_path, link_weights_path)
def evaluate(self, eval_split="valid"): """ """ metrics = self.model.score(self.dataloaders[eval_split], **self.evaluate_args) if self.model_dir: self._save_metrics(self.model_dir, metrics, f"{eval_split}") else: metrics_path = os.path.join(self.dir, "best") ensure_dir_exists(metrics_path) print(metrics_path) self._save_metrics(metrics_path, metrics, f"{eval_split}")
def _save_model(self, name="last"): """ Saves the model and symlinks to experiment directory. Args: - is_best (bool) True when model exceeds best_score. Saves twice. """ remote_model_path = os.path.join(self.remote_model_dir, f"{self.experiment_t}_{name}_model") self.model.save(remote_model_path) self.model_dir = os.path.join(self.dir, name) ensure_dir_exists(self.model_dir) link_model_path = os.path.join(self.model_dir, "model.link") self._link_model(remote_model_path, link_model_path)
def _save_epoch(self, epoch_num, train_metrics, valid_metrics, name="last"): """ """ save_dir = os.path.join(self.dir, name) ensure_dir_exists(save_dir) logging.info("Saving checkpoint...") self.writer.export_scalars_to_json(get_latest_file(self.log_dir)) # records the most recent training epoch self._save_metrics(save_dir, train_metrics, "train") self._save_metrics(save_dir, valid_metrics, "valid")
def create(group_dir, copy_dir, name, message): """ Creates a new process directory with a params.json file and notebook.ipynb. Every process belongs to a group (a directory containing a set of related processes). When we call create on a group (specified by group_dir), by default a new process will be created with the same parameters as the most recently created experiment in the group. args: group_dir (str) """ print(copy_dir) ensure_dir_exists(group_dir) if copy_dir is None: copy_dirs = [curr_dir for curr_dir in os.listdir(group_dir)] if not copy_dirs: copy_dir = "experiments/_default" else: # take params from most recent experiment, exclude dirs with leading '_' copy_dirs = [ copy_dir for copy_dir in copy_dirs if copy_dir[0] != '_' ] copy_dir = os.path.join(group_dir, sorted(copy_dirs)[-1]) name = f"{strftime('%y-%m-%d_%H-%M', localtime())}_{name}" process_dir = os.path.join(group_dir, name) ensure_dir_exists(process_dir) copyfile(src=os.path.join(copy_dir, "params.json"), dst=os.path.join(process_dir, "params.json")) create_notebook(process_dir, notes=message) print( f"Created process at '{process_dir}' with parameters from '{copy_dir}'" ) print(f"Open process notebook with:") print( f"jupyter notebook {process_dir}/notebook.ipynb --no-browser --port=8200" ) print(f"Run the process with:") print(f"run {process_dir}")
def initialize_hdf5(self, mode="read"): """ If an HDF5 file does not exist, creates a new one and initializes base groups ["/exams/"]. If it does exist, it is simply opened for reading and writing. The directory of the dataset is assumed to be "{data_dir}/datasets/{dataset_name}. args: mode (str) read or write """ dataset_dir = os.path.join(self.data_dir, "datasets") ensure_dir_exists(dataset_dir) dataset_path = os.path.join(dataset_dir, "{}.hdf5".format(self.dataset_name)) exists = os.path.exists(dataset_path) if mode == 'read': try: self.file = h5py.File(dataset_path, 'r') except: raise (Exception( "HDF5 file ({}) must exist when opening in read mode.". format(dataset_path))) #logging.info(("Opening HDF5 file for reading: {}").format(self.dataset_name)) elif mode == "write": if not exists: logging.info("Creating new HDF5 file: {}".format( self.dataset_name)) logging.info(("Opening HDF5 file for reading " + "and writing:").format(self.dataset_name)) self.file = h5py.File(dataset_path, 'a') if "exams" not in self.file: self.file.create_group("exams") else: raise (Exception("HDF5 mode not recognized.")) try: self.exams = self.file["exams"] except: raise (Exception("HDF5 file does not have \"exams\" group.")) self.mode = mode
def _create_experiments(self, params): """ """ logging.info("Expanding params") params = self._expand_params(params) logging.info("Creating experiment directories") self.candidates_dir = os.path.join(self.dir, "candidates") self.experiments = {} for idx, curr_params in enumerate(params): experiment_dir = os.path.join(self.candidates_dir, f"exp_{idx}") self.experiments[experiment_dir] = { "last_metrics": None, "best_metrics": None, "params": curr_params, } ensure_dir_exists(experiment_dir) with open(os.path.join(experiment_dir, "params.json"), "w") as f: json.dump(curr_params, f, indent=4) create_notebook( experiment_dir, notes=f"Created as part of tuner {self.dir}" )
def __init__( self, dir, dataset_class="BinaryDataset", dataset_args={}, dataloader_configs=[], train_args={}, evaluate_args={}, model_class="BaseModel", model_args={}, task_configs=None, default_task_config={}, primary_metric="roc_auc", reload_weights="best", cuda=True, devices=[0], seed=123, remote_model_dir="/data4/data/fdg-pet-ct/models", ): """ Initializes the Trainer subclass of Process. """ super().__init__(dir) # load instance variables self.train_args = train_args self.evaluate_args = evaluate_args # set the model_dir self.model_dir = None self.remote_model_dir = remote_model_dir # set random seed for reproducible experimentsm self.cuda = cuda self.device = devices[0] self.devices = devices torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed) # distribute shared params to other params if task_configs is not None: new_task_configs = [] for task_config in task_configs: new_task_config = default_task_config.copy() new_task_config.update(task_config) new_task_configs.append(new_task_config) task_configs = new_task_configs model_args["task_configs"] = task_configs dataset_args["task_configs"] = task_configs self.primary_task = task_configs[0]["task"] self.primary_metric = primary_metric # load dataloaders self._build_dataloaders(dataset_class, dataset_args, dataloader_configs) logging.info("Building model") self._build_model(model_class, model_args, reload_weights=reload_weights) # records epoch data in csv self.train_history = TrainHistory(self.dir) # creates log dir self.log_dir = os.path.join(self.dir, "logs") ensure_dir_exists(self.log_dir) self.writer = SummaryWriter(log_dir=self.log_dir) # timestamp acts as checkpoint name experiment_time = str(time()).replace(".", "_") self.experiment_t = f"{uuid4()}-time{experiment_time}" logging.info("-" * 30)