def test_construct_filename(self): props = { 'net_name': 'resnet18', 'min_epochs': '3', 'max_epochs': '6', 'batch_size': '2', 'num_folds': '3', 'seed': '42', 'kernel_size': '7', 'sample_width': '400', 'sample_height': '400', 'lr': '0.01', 'to_grayscale': 'False' } fname = FileUtils.construct_filename(props, prefix='model', suffix='.pth', incl_date=False) expected = 'model_net_resnet18_bs_2_folds_3_ks_7_lr_0.01_gray_False.pth' self.assertEqual(fname, expected)
def create_csv_writer(self, raw_data_dir): ''' Create a csv_writer that will fill a csv file during training/validation as follows: epoch train_preds train_labels val_preds val_labels Cols after the integer 'epoch' col will each be an array of ints: train_preds train_lbls val_preds val_lbls 2,"[2,5,1,2,3]","[2,6,1,2,1]","[1,2]", "[1,3]" If raw_data_dir is provided as a str, it is taken as the directory where csv file with predictions and labels are to be written. The dir is created if necessary. If the arg is instead set to True, a dir 'runs_raw_results' is created under this script's directory if it does not exist. Then a subdirectory is created for this run, using the hparam settings to build a file name. The dir is created if needed. Result ex.: <script_dir> runs_raw_results Run_lr_0.001_br_32 run_2021_05_ ... _lr_0.001_br_32.csv Then file name is created, again from the run hparam settings. If this file exists, user is asked whether to remove or append. The inst var self.csv_writer is initialized to: o None if csv file exists, but is not to be overwritten nor appended-to o A filed descriptor for a file open for either 'write' or 'append. :param raw_data_dir: If simply True, create dir and file names from hparams, and create as needed. If a string, it is assumed to be the directory where a .csv file is to be created. If None, self.csv_writer is set to None. :type raw_data_dir: {None | True | str| :return: CSV writer ready for action. Set either to write a fresh file, or append to an existing file. Unless file exists, and user decided not to overwrite :rtype: {None | csv.writer} ''' # Ensure the csv file root dir exists if # we'll do a csv dir and run-file below it: if type(raw_data_dir) == str: raw_data_root = raw_data_dir else: raw_data_root = os.path.join(self.curr_dir, 'runs_raw_results') if not os.path.exists(raw_data_root): os.mkdir(raw_data_root) # Can rely on raw_data_root being defined and existing: if raw_data_dir is None: return None # Create both a raw dir sub-directory and a .csv file # for this run: csv_subdir_name = FileUtils.construct_filename(self.config.Training, prefix='Run', incl_date=True) os.makedirs(csv_subdir_name) # Create a csv file name: csv_file_nm = FileUtils.construct_filename(self.config.Training, prefix='run', suffix='.csv', incl_date=True) csv_path = os.path.join(raw_data_root, csv_file_nm) # Get csv_raw_fd appropriately: if os.path.exists(csv_path): do_overwrite = FileUtils.user_confirm( f"File {csv_path} exists; overwrite?", default='N') if not do_overwrite: do_append = FileUtils.user_confirm(f"Append instead?", default='N') if not do_append: return None else: mode = 'a' else: mode = 'w' csv_writer = CSVWriterCloseable(csv_path, mode=mode, delimiter=',') header = [ 'epoch', 'train_preds', 'train_labels', 'val_preds', 'val_labels' ] csv_writer.writerow(header) return csv_writer
def save_model(self, model, epoch): ''' Saves and retains trained models on disk. Within a subdir the method maintains a queue of files of len history_len: fname_1_ep_0.pth fname_2_ep_1.pth ... fname_<history_len>.pth where ep_<n> is the epoch during training where the model of that moment is being saved. When history_len model files are already present, removes the oldest. Assumptions: o self.fname_els_dict contains prop/value pairs for use in FileUtils.construct_filename() {'bs' : 32, 'lr' : 0.001, ... } o self model_fnames is a deque the size of which indicates how many models to save before discarding the oldest one as new ones are added :param model: model to save :type model: nn.module :param epoch: the epoch that created the model :type epoch: int :param history_len: number of snapshot to retain :type history_len: int ''' deque_len = len(self.model_fnames) if deque_len >= self.history_len: # Pushing a new model fname to the # front will pop the oldest from the # end. That file needs to be deleted: oldest_model_path = self.model_fnames[-1] else: # No file will need to be deleted. # Still filling our allotment: oldest_model_path = None model_fname = FileUtils.construct_filename(self.fname_els_dict, prefix='mod', suffix=f"_ep{epoch}.pth", incl_date=True) model_path = os.path.join(self.run_subdir, model_fname) # As recommended by pytorch, save the # state_dict for portability: torch.save(model.state_dict(), model_path) self.model_fnames.appendleft(model_path) if oldest_model_path is not None: try: os.remove(oldest_model_path) except Exception as e: self.log.warn(f"Could not remove old model: {repr(e)}")
def _construct_run_subdir(self, config, num_classes, model_root): ''' Constructs a directory name composed of elements specified in utility.py's FileUtils file/config info dicts. Ensures that <model_root>/subdir_name does not exist. If it does, keeps adding '_r<n>' to the end of the dir name. Final str will look like this: model_2021-03-23T15_38_39_net_resnet18_pre_True_frz_6_bs_2_folds_5_opt_SGD_ks_7_lr_0.01_gray_False Details will depend on the passed in configuration. Instance var fname_els_dict will contain all run attr/values needed for calls to FileUtils.construct_filename() :param config: run configuration :type config: NeuralNetConfig :param num_classes: number of target classes :type num_classes: int :param model_root: full path to dir where the subdir is to be created :type model_root: str :return: unique subdir name of self.model_root, which has been created :rtype: str ''' # Using config, gather run-property/value # pairs to include in the dir name: fname_els_dict = {} section_dict = config.Training for el_name, el_abbr in FileUtils.fname_long_2_short.items(): el_type = FileUtils.fname_el_types[el_abbr] if el_type == int: fname_els_dict[el_name] = section_dict.getint(el_name) elif el_type == str: fname_els_dict[el_name] = section_dict.get(el_name) elif el_type == float: fname_els_dict[el_name] = section_dict.getfloat(el_name) elif el_type == bool: fname_els_dict[el_name] = section_dict.getboolean(el_name) elif callable(el_type): # A lambda or func. Apply it: fname_els_dict[el_name] = el_type(section_dict[el_name]) fname_els_dict['num_classes'] = num_classes # Save this root name: self.fname_els_dict = fname_els_dict # Get the subdir name (without leading path): dir_basename = FileUtils.construct_filename( fname_els_dict, prefix='models', suffix=None, incl_date=True) final_dir_path = os.path.join(model_root, dir_basename) # Disambiguate by appending '_r<n>' as needed: disambiguation = 1 while os.path.exists(final_dir_path): new_basename = f"{dir_basename}_r{disambiguation}" final_dir_path = os.path.join(model_root, new_basename) disambiguation += 1 os.makedirs(final_dir_path) return final_dir_path
def prep_model_inference(self, model_path): ''' 1. Parses model_path into its components, and creates a dict: self.model_props, which contains the network type, grayscale or not, whether pretrained, etc. 2. Creates self.csv_writer to write results measures into csv files. The destination file is determined as follows: <script_dir>/runs_raw_inferences/inf_csv_results_<datetime>/<model-props-derived-fname>.csv 3. Creates self.writer(), a tensorboard writer with destination dir: <script_dir>/runs_inferences/inf_results_<datetime> 4. Creates an ImageFolder classed dataset to self.samples_path 5. Creates a shuffling DataLoader 6. Initializes self.num_classes and self.class_names 7. Creates self.model from the passed-in model_path name :param model_path: path to model that will be used for inference by this instance of Inferencer :type model_path: str ''' model_fname = os.path.basename(model_path) # Extract model properties # from the model filename: self.model_props = FileUtils.parse_filename(model_fname) csv_results_root = os.path.join(self.curr_dir, 'runs_raw_inferences') #self.csv_dir = os.path.join(csv_results_root, f"inf_csv_results_{uuid.uuid4().hex}") ts = FileUtils.file_timestamp() self.csv_dir = os.path.join(csv_results_root, f"inf_csv_results_{ts}") os.makedirs(self.csv_dir, exist_ok=True) csv_file_nm = FileUtils.construct_filename(self.model_props, prefix='inf', suffix='.csv', incl_date=True) csv_path = os.path.join(self.csv_dir, csv_file_nm) self.csv_writer = CSVWriterCloseable(csv_path) ts = FileUtils.file_timestamp() tensorboard_root = os.path.join(self.curr_dir, 'runs_inferences') tensorboard_dest = os.path.join(tensorboard_root, f"inf_results_{ts}") #f"inf_results_{ts}{uuid.uuid4().hex}") os.makedirs(tensorboard_dest, exist_ok=True) self.writer = SummaryWriterPlus(log_dir=tensorboard_dest) dataset = SingleRootImageDataset( self.samples_path, to_grayscale=self.model_props['to_grayscale']) # Make reproducible: Utils.set_seed(42) #********Utils.set_seed(56) self.loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, drop_last=True) self.class_names = dataset.class_names() self.num_classes = len(self.class_names) # Get the right type of model, # Don't bother getting it pretrained, # of freezing it, b/c we will overwrite # the weights: self.model = NetUtils.get_net( self.model_props['net_name'], num_classes=self.num_classes, pretrained=False, freeze=0, to_grayscale=self.model_props['to_grayscale']) self.log.info(f"Tensorboard info written to {tensorboard_dest}") self.log.info(f"Result measurement CSV file(s) written to {csv_path}")