def create_criteria_from_cfg(self, cfg=None, **module_kwargs): cfg = _prepare_cfg(cfg, CRITERIA_KEYS) if not cfg.criteria: return bd.print_separator() bd.log('Building criteria from cfg') ret = bd.State({'weights': {}}) w_strs, mod_strs = [], [] module_kwargs = {k.lower(): v for k, v in module_kwargs.items()} for name in cfg.criteria: kwargs = {} if 'all' in module_kwargs: kwargs.update(module_kwargs['all']) if name.lower() in module_kwargs: kwargs.update(module_kwargs[name.lower()]) module = bd.magic_module([name, {'kwargs': kwargs}]) with cfg.group_fallback(): weight = cfg.g[name].get('criterion_weight') mod_strs.append(f'\t{module}') w_strs.append(f'\t{name}={weight}') ret[name] = module ret.weights[name] = weight bd.write('Criteria:\n' + '\n'.join(mod_strs)) bd.write('Weights:\n' + '\n'.join(w_strs)) bd.print_separator() return ret
def freeze_stats(module, name=None): if not _has_frozen_stats(module): altname = '.' if name is None else f': {name}' bd.log(f'Freezing batchnorm running stats{altname}') BatchNormFreezer.assign(module, 'forward') BatchNormFreezer.assign(module, 'extra_repr') setattr(module, 'frozen_stats', True)
def unfreeze_stats(module, name=None): if _has_frozen_stats(module): name = '.' if name is None else f': {name}' bd.log(f'Unfreezing batchnorm module{name}') BatchNormFreezer.unassign(module, 'forward') BatchNormFreezer.unassign(module, 'extra_repr') setattr(module, 'frozen_stats', False)
def guard(signum, frame): bd.write() bd.log('Received Interrupt in guarded section. ' 'Program will terminate when section is done. ' 'To terminate immediately use SIGKILL.') if reason is not None: bd.write(f'Reason: {reason}') _SIG.sig = True
def schedule_step(metric=None): current_lrs = [group['lr'] for group in optimizer.param_groups] schedule_fn(metric) new_lrs = [group['lr'] for group in optimizer.param_groups] for i, (current_lr, new_lr) in enumerate(zip(current_lrs, new_lrs)): if new_lr != current_lr: bd.log( f'Learning rate changed from {current_lr:.2e} to {new_lr:.2e} (param_group {i})' )
def setup_devices_from_cfg(self, cfg=None): cfg = _prepare_cfg(cfg, DEVICE_KEYS) self.set_device(cfg.device) device = self.devices.default if device.type == 'cuda': bd.log(f'Setting default cuda device: {device}') torch.cuda.set_device(device) bd.log(f'Setting cudnn_benchmark={cfg.cudnn_benchmark}') torch.backends.cudnn.benchmark = cfg.cudnn_benchmark
def __init__(self, *paths, recurse=False): bd.log('Launching Matlab Engine...') main_path = bd.main_file_path() start = time.time() self.engine = matlab.engine.start_matlab(f'-sd {main_path}') end = time.time() bd.log(f'Matlab launch done. Time taken: {end-start:.2f}s.') # Add the current path to access .m functions defined here self.add_path(os.path.dirname(os.path.abspath(__file__)), recurse=True) self.add_path(*paths, recurse=recurse)
def attach_data(self, mode, force=False): if force or (f'data.{mode}' not in self): bd.log(f'Attaching {mode} dataset.') fn_name = f'setup_{mode}_data' if not hasattr(self, fn_name): bd.warn( f'Could not find setup function for {mode} dataset. Will not attach to engine.' ) return data_fn = getattr(self, fn_name) self.data[mode] = data_fn()
def determine_channel_sizes(module_list, x_in=None): if x_in is None: x_in = torch.ones(1, 3, 256, 256) nf = [] bd.print_separator() bd.log('Calculating channel sizes.') for module in module_list: x_in = module(x_in) nf.append(x_in.shape[1]) bd.log(f'Sizes are {nf}') bd.print_separator() return nf
def __call__(self, model): if not isinstance(model, nn.Module): raise TypeError( 'Initializer expected nn.Module as model ' f'but got {torch.typename(model)}' ) bd.print_separator() bd.log(f'Initializing {model.__class__.__name__} with {self}') for mname, module in model.named_modules(): for pname, parameter in module._parameters.items(): mod_type = torch.typename(module).split('.')[-1] if self._filter(module, mname, parameter, pname): bd.log(f'Initializing "{pname}" in type {mod_type} module: {mname}') yield parameter, (module, mname, pname) bd.print_separator()
def torchvision_dataset( torchvision_dataset, data_root_path, download=True, transform=None, target_transform=None, train=True, torchvision_as_is=True, ): """Creates a dataset from torchvision, configured using Command Line Arguments. Args: transform (callable, optional): A function that transforms an image (default None). target_transform (callable, optional): A function that transforms a label (default None). train (bool, optional): Training set or validation - if applicable (default True). subset (string, optional): Specifies the subset of the relevant categories, if any of them was split (default, None). Relevant Command Line Arguments: - **dataset**: `--data`, `--torchvision_dataset`. Warning: Unlike the torchvision datasets, this function returns a dataset that uses NumPy Arrays instead of a PIL Images. """ if torchvision_dataset is None: raise RuntimeError('Argumnent torchvision_dataset was not specified.') dset_str = torchvision_dataset.lower() bd.log(f'Using {dset_str} dataset from torchvision.') if dset_str in _dsets: TVDataset = getattr(datasets, _dsets[dset_str]) if dset_str in _NEED_GETITEM: TVDataset.__getitem__ = _custom_get_item ret_dataset = TVDataset( data_root_path, train=train, download=download, transform=transform, target_transform=target_transform, ) if dset_str in _NEED_UNSQUEEZING: ret_dataset.data = ret_dataset.data.unsqueeze(3).numpy() ret_dataset.targets = ret_dataset.targets.numpy() else: raise NotImplementedError( f'{torchvision_dataset} dataset not implemented.') return ret_dataset
def __call__(self, cmd, should_raise=True, log=False): try: if log: bd.log(f'git {cmd}') proc = subprocess.run( f'cd {self.directory}; git {cmd}', shell=True, capture_output=True, text=True, ) except subprocess.CalledProcessError: if should_raise: raise pass success = proc.returncode == 0 return proc, success
def pretrained_resnet_layers( network_name, num_pretrained_layers=6, freeze_bn_running_stats=False, split_before_relus=False, ): if (num_pretrained_layers < 0) or (num_pretrained_layers > 6): raise ValueError( 'Expected num_pretrained_layers to be in the range [0,6]') avail = tv.models.resnet.__all__ if network_name not in avail: raise ValueError( f'Expected name to be one of {avail}, got "{network_name}"') bd.print_separator() bd.log(f'Fetching (pretrained) {network_name}.') bd.log(f'Number of pretrained layers: {num_pretrained_layers}.') model = tv.models.__dict__[network_name](pretrained=True) modules = [ [model.conv1, model.bn1, model.relu], [model.maxpool, model.layer1], [model.layer2], [model.layer3], [model.layer4], [model.avgpool, nn.Flatten(1), model.fc], ] for i, module_list in enumerate(modules, 1): if i > num_pretrained_layers: break for mod in module_list: bd.set_pretrained(mod) if freeze_bn_running_stats: bd.log('Freezing batchnorm for module {name}.') bd.freeze_bn_running_stats(mod) if split_before_relus: for m in [model.layer1, model.layer2, model.layer3, model.layer4]: block = m[-1] if isinstance(block, BasicBlock): block.forward = types.MethodType(forward_no_relu_basic, block) elif isinstance(block, Bottleneck): block.forward = types.MethodType(forward_no_relu_bottleneck, block) else: raise RuntimeError( f'Attempted to split before relu from module type: {torch.typename(block)}' ) modules = [ [model.conv1, model.bn1], [model.relu, model.maxpool, model.layer1], [model.relu, model.layer2], [model.relu, model.layer3], [model.relu, model.layer4], [model.relu, model.avgpool, nn.Flatten(1), model.fc], ] return modules
def freeze(module_or_param, value=True): if not isinstance(value, bool): raise RuntimeError( f'bd.freeze expected value to be bool, got: {type(value)}') if isinstance(module_or_param, nn.Module): for name, m in module_or_param.named_modules(): if name: bd.log(f'Setting {name} to frozen={value}.') else: bd.log(f'Setting module to frozen={value}.') setattr(m, 'is_frozen', value) for name, p in m.named_parameters(recurse=False): setattr(p, 'is_frozen', value) bd.log(f'Setting {name} requires_grad={not value}.') p.requires_grad_(not value) else: bd.log(f'Setting {name} requires_grad={not value}.') module_or_param.requires_grad_(not value) return module_or_param
def setup(self, *cfg_files, extra=None, use_sysargv=True): if not self._prv['done_setup']: bd.log('Processing configuration') arglist = [] # if use_sysargv is bd.Null: # use_sysargv = self._prv['is_core_config'] if not isinstance(use_sysargv, bool): raise RuntimeError( f'use_sysargv expected a bool value. Got {type(use_sysargv)}' ) if use_sysargv: arglist += sys.argv[1:] if cfg_files: cfg_files = [bd.process_path(f) for f in cfg_files] arglist += cfg_files if extra is not None: arglist += extra self._update_data_from_parsed(self._parse(arglist)) all_groups = self._prv['all_groups'] if all_groups: bd.log(f'Groups defined: {self._prv["all_groups"]}') # Register automatic arguments self._prv['data']['time_configured'] = _create_datum( time.strftime("%Y/%m/%d %H:%M:%S") ) self._prv['data']['process_id'] = _create_datum(_PROCESS_ID) self._prv['data']['session_path'] = _create_datum(None) # Leave this here, as the Logger functions called later on (in the subprocess) # and accesing cfg.project_path and cfg.session_name # depend on correctly identifying if _prv['done_setup'] is True or False self._prv['done_setup'] = True # # If using logger, notify with session_id. This is to change the ID # # from the execution_id to the session_path # if bd.BoardomLogger._started: # # CFG needs to be sent first (lmdb requires session_path) # bd.BoardomLogger()._send_cfg_full() # bd.BoardomLogger()._start_lmdb() bd.log('Config done.') elif cfg_files: raise RuntimeError( 'Could not setup from config files as bd.setup() was already called.' ) return self
def _create_session(cfg, session_name=None): cfg.setup() if not cfg._prv['has_core_config']: raise RuntimeError('Can not create_session without core configuration') session = Session() # Configure session name if isinstance(session_name, str): _set(cfg, 'session_name', session_name) elif isinstance(session_name, Callable): session_name = session_name(cfg) _set(cfg, 'session_name', session_name) elif session_name is None: session_name = _get(cfg, 'session_name') else: raise RuntimeError( f'Unknown type for session_name parameter: {type(session_name)}') bd.log(f'Creating {session_name} session.') project_path = bd.process_path(_get(cfg, 'project_path'), create=True) bd.log(f'Project path: {project_path}.') session_path = os.path.join(project_path, session_name) bd.make_dir(session_path) boardom_path = bd.make_dir(os.path.join(session_path, '.boardom')) session_file = os.path.join(boardom_path, BD_FILENAME) # TODO: Improve Management of Session Files # -- Maybe use a single file? # -- Maybe add information if not os.path.exists(session_file): with open(session_file, 'w') as f: f.write('42') # Maybe create log create_log = _get(cfg, 'log_stdout') if create_log: log_name = f'{session_name}_{_PROCESS_ID}.log' logdir = os.path.join(session_path, 'log') logdir = bd.process_path(logdir, create=True) logfile = os.path.join(logdir, log_name) logfile = bd.number_file_if_exists(logfile) bd.log(f'Creating log file at {logfile}') session.stream_replicator = bd.replicate_std_stream(logfile, 'stdout') # Maybe copy config files cfg_files = cfg._prv['cfg_files'] copy_config_files = _get(cfg, 'copy_config_files') if copy_config_files: for i, filename in enumerate(cfg_files): config_path = os.path.join(session_path, 'cfg') bd.make_dir(config_path) if i == 0: bd.log(f'Copying configuration files to {config_path}') fname, ext = os.path.splitext(filename) copied_config_filename = f'{fname}_{_PROCESS_ID}{ext}' bd.copy_file_to_dir( filename, config_path, number=True, new_name=copied_config_filename, ) # Maybe save full config save_full_config = _get(cfg, 'save_full_config') if save_full_config: config_path = os.path.join(session_path, 'cfg') bd.make_dir(config_path) config_file = os.path.join(config_path, f'full_cfg_{_PROCESS_ID}.bd') config_file = bd.number_file_if_exists(config_file) bd.log(f'Saving full configuration at: {config_file}') # Makes an entry for the saved settings file def _make_entry(key, val): if any(isinstance(val, x) for x in [list, tuple]): val = ' '.join([str(x) for x in val]) return f'{key} {str(val)}' args_to_print = [ _make_entry(key, val) for key, val in cfg.__dict__.items() ] args_to_print.sort() bd.write_string_to_file('\n'.join(args_to_print), config_file) autocommit = _get(cfg, 'autocommit') only_run_same_hash = _get(cfg, 'only_run_same_hash') _, _, autohash = maybe_autocommit(autocommit, only_run_same_hash, session_path) pid_fname = f'process.{_PROCESS_ID}' if autohash is not None: pid_fname += f'.{autohash}' # process_dir = bd.make_dir(os.path.join(boardom_path, 'processes')) # process_id_file = os.path.join(process_dir, pid_fname) # # if os.path.exists(process_id_file): # raise RuntimeError( # 'Process File Already Exists?!? That is unlucky. Please run again..' # f'\n id: {process_id_file}' # ) # else: # with open(process_id_file, 'w') as f: # f.write('42') if _get(cfg, 'print_cfg'): bd.write('-' * 80) bd.write(cfg) bd.write('-' * 80) cfg._prv['data']['session_path'] = _create_datum(session_path) return session