def main(args_parsed): o_path = Path(args_parsed.output) o_path.mkdir(parents=True, exist_ok=True) res = dict() res["name"] = args_parsed.name res["createTime"] = int(time.time()) if args_parsed.describe is not None: res["description"] = args_parsed.describe mods = list() for l_mod in args_parsed.model: mod_cont = dict() conf = l_mod[0] path = l_mod[1] # if not ensure_exist(conf) or not ensure_exist(path): if not ensure_exist(conf): continue try: cf_content = read_json(conf) for sec in ("name", "arch", "metrics", "tester"): mod_cont[sec] = cf_content[sec] mod_cont["path"] = os.path.basename(path) # copy model & rename mods.append(mod_cont) except Exception as e: print(str(e)) continue res["models"] = mods write_json(res, o_path / (args_parsed.name + ".json"))
def __init__(self, args, resume=None, run_id=None): """ class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving and logging module. :param config: Dict containing configurations, hyperparameters for training. contents of `config.json` file for example. :param resume: String, path to the checkpoint being loaded. :param modification: Dict keychain:value, specifying position values to be replaced from config dict. :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default """ # load config file and apply modification self._config = {} for key in vars(args): self._config[key] = getattr(args, key) # set save_dir where trained model and log will be saved. save_dir = Path(self.config['trainer_save_dir']) exper_name = self.config['exper_name'] if run_id is None: # use timestamp as default run-id run_id = datetime.now().strftime(r'%m%d_%H%M%S') self._save_dir = save_dir / 'models' / exper_name / run_id self._log_dir = save_dir / 'log' / exper_name / run_id # make directory for saving checkpoints and log. exist_ok = run_id == '' self.save_dir.mkdir(parents=True, exist_ok=exist_ok) self.log_dir.mkdir(parents=True, exist_ok=exist_ok) # save updated config file to the checkpoint dir write_json(self.config, self.save_dir / 'config.json')
def __init__(self, args, options='', timestamp=True): # parse default and custom cli options for opt in options: args.add_argument(*opt.flags, default=None, type=opt.type) args = args.parse_args() self.resume = None if args.device: os.environ["CUDA_VISIBLE_DEVICES"] = args.device if args.resume: self.resume = Path(args.resume) self.cfg_fname = self.resume.parent / 'config.json' if args.config: self.cfg_fname = Path(args.config) msg_no_cfg = ("Configuration file need to be specified. " "Add '-c config.json', for example.") assert self.cfg_fname is not None, msg_no_cfg # load config file and apply custom cli options config = read_json(self.cfg_fname) self.__config = _update_config(config, options, args) self.__raw = copy.deepcopy(self.__config) # set save_dir where trained model and log will be saved. save_dir = Path( parse_value(self.config['trainer']['extra_args']['save_dir'])) timestamp = datetime.now().strftime( r'%m%d_%H%M%S') if timestamp else '' exper_name = self.config['name'] self.__save_dir = save_dir / 'models' / exper_name / timestamp self.__log_dir = save_dir / 'log' / exper_name / timestamp self.save_dir.mkdir(parents=True, exist_ok=True) self.log_dir.mkdir(parents=True, exist_ok=True) # save updated config file to the checkpoint dir if get_global_rank() == 0: write_json(self.config, self.save_dir / 'config.json') # configure logging module setup_logging(self.log_dir) self.log_levels = { 0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG } logger = self.get_logger('config') logger.info(f"Experiment name: {exper_name}")
def __init__(self, config, resume=None, modification=None, run_id=None, mode='train'): """ class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving and logging module. :param config: Dict containing configurations, hyperparameters for training. contents of `config.json` file for example. :param resume: String, path to the checkpoint being loaded. :param modification: Dict keychain:value, specifying position values to be replaced from config dict. :param run_id: Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default """ # load config file and apply modification self._config = _update_config(config, modification) self.resume = resume # set save_dir where trained model and log will be saved. save_dir = Path(self.config['trainer']['save_dir']) exper_name = self.config['name'] if run_id is None: # use timestamp as default run-id run_id = datetime.now().strftime(r'%m%d_%H%M%S') self._save_dir = save_dir / 'models' / exper_name / run_id # self._log_dir = save_dir / 'log' / exper_name / run_id self._log_dir = self._save_dir # make directory for saving checkpoints and log. if mode == 'train': exist_ok = run_id == '' self.save_dir.mkdir(parents=True, exist_ok=exist_ok) # self.log_dir.mkdir(parents=True, exist_ok=exist_ok) # save updated config file to the checkpoint dir if mode == 'train': write_json(self.config, self.save_dir / 'config.json') # configure logging module setup_logging(self.log_dir) self.log_levels = { 0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG }
def __init__(self, args, options='', timestamp=True, slave_mode=False): # slave_mode - when calling the config parser form an existing process, we # avoid reinitialising the logger and ignore sys.argv when argparsing. # parse default and custom cli options for opt in options: args.add_argument(*opt.flags, default=None, type=opt.type) if slave_mode: args = args.parse_args(args=[]) else: args = args.parse_args() if args.device: os.environ["CUDA_VISIBLE_DEVICES"] = args.device if args.resume and not slave_mode: self.resume = Path(args.resume) # self.cfg_fname = self.resume.parent / 'config.json' else: msg_no_cfg = "Config file must be specified" assert args.config is not None, msg_no_cfg self.resume = None self.cfg_fname = Path(args.config) # load config file and apply custom cli options config = read_json(self.cfg_fname) self._config = _update_config(config, options, args) if self._config.get("eval_config", False): # validate path to evaluation file eval_cfg_path = self._config.get("eval_config") msg = f"eval_config was specified, but `{eval_cfg_path}` does not exist" assert Path(self._config.get("eval_config")).exists(), msg # set save_dir where trained model and log will be saved. if "trainer" in self.config: save_dir = Path(self.config['trainer']['save_dir']) else: save_dir = Path(self.config['tester']['save_dir']) timestamp = datetime.now().strftime( r"%Y-%m-%d_%H-%M-%S") if timestamp else "" if slave_mode: timestamp = f"{timestamp}-eval-worker" # We assume that the config files are organised into directories such that # each directory has the name of the dataset. dataset_name = self.cfg_fname.parent.stem exper_name = f"{dataset_name}-{self.cfg_fname.stem}" self._save_dir = save_dir / 'models' / exper_name / timestamp self._log_dir = save_dir / 'log' / exper_name / timestamp self._web_log_dir = save_dir / 'web' / exper_name / timestamp self._exper_name = exper_name self._args = args # if set, remove all previous experiments with the current config if vars(args).get("purge_exp_dir", False): for dirpath in (self._save_dir, self._log_dir, self._web_log_dir): config_dir = dirpath.parent existing = list(config_dir.glob("*")) print( f"purging {len(existing)} directories from config_dir...") tic = time.time() os.system(f"rm -rf {config_dir}") print(f"Finished purge in {time.time() - tic:.3f}s") self.save_dir.mkdir(parents=True, exist_ok=True) self.log_dir.mkdir(parents=True, exist_ok=True) # save updated config file to the checkpoint dir write_json(self.config, self.save_dir / 'config.json') # configure logging module if not slave_mode: self.log_path = setup_logging(self.log_dir) self.log_levels = { 0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG }
def __init__(self, config, resume=None, modification=None, run_id=None): """ 解析json配置文件, 处理超参数,for训练、初始化模型、检查点模型、日志模块 class to parse configuration json file. Handles hyperparameters for training, initializations of modules, checkpoint saving and logging module. :param config: 配置的json(字典) Dict containing configurations, hyperparameters for training. contents of `rnn_config.json` file for example. :param resume: 重启checkpoint的路径 String, path to the checkpoint being loaded. :param modification: 修改项(字典) Dict keychain:value, specifying position values to be replaced from config dict. :param run_id: 训练唯一标识(用来保存检查点训练日志) Unique Identifier for training processes. Used to save checkpoints and training log. Timestamp is being used as default """ # 训练唯一标识 if run_id is None: # 若run_id为none,用时间戳作为默认run_id run_id = datetime.now().strftime(r'%m%d_%H%M%S') # 加载配置文件,并更新配置(这个不能乱放,要先放前面) self._config = _update_config(config, modification) # 项目名 exper_name = self.config['name'] # 路径:重启checkpoint self.resume = resume # 保存路径:模型、模型记录、日志: save_dir = Path(self.config['trainer']['saved']) # 路径文件名:模型、模型记录 # /,应该是文件名可以这样连,字符串不行 self._save_dir = save_dir / 'models' / run_id # 路径文件名:日志 self._log_dir = save_dir / 'log' / run_id # 创建目录:模型、模型记录、日志 """ run_id为'',exist_ok为True parents,父目录不存在,也要创建目录 exist_ok,? pathlib.Path('/my/directory').mkdir(parents=True, exist_ok=True) parents:如果父目录不存在,是否创建父目录。 exist_ok:只有在目录不存在时,创建目录;目录已存在时,不会抛出异常。 """ exist_ok = run_id == '' self.save_dir.mkdir(parents=True, exist_ok=exist_ok) self.log_dir.mkdir(parents=True, exist_ok=exist_ok) # 写配置:训练配置(√);模型、模型记录(×) # 注意,这里要改,是写死的! # 记下run_id的训练配置 """ 写,这个run_id使用的配置,到(模型/记录)目录 save updated config file to the checkpoint dir """ write_json(self.config, self.save_dir / 'config.json') # 写配置:训练日志 # 建立日志 setup_logging(self.log_dir) # 日志等级 self.log_levels = { 0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG }