def get_default_model_parameters(): return merge_params( ModelTemplate.get_default_model_parameters(), HParams( token_rep_highway="none", # [none|mlp,gate,add] seq_rep_highway="none", # [none|mlp,gate,add] ))
def setup_training(args: argparse.Namespace) -> None: """ Sets up directories (logging and tensorboard), sets up the general logger and calls train_model(...) with the parameters specified in args for this run. The model to train can be a complete model with all the tiers (if args.tier == None) or a single tier (the one specified in args.tier). Args: args (argparse.Namespace): parameters to set up the training. At least, args must contain: args = {"path_config": ..., "tier": ..., "checkpoint_path": ...} """ # 1. Read hyperparameters from file hp = HParams.from_yaml(args.path_config) # check if GPU available and add it to parameters hp["device"] = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # 2. Create extension of the architecture of the model and timestamp for this run (use to # identify folders and files created for this run) # format: f(params_file)_t(n_tiers)_l(n_layers)_hd(hidden_size)_gmm(gmm_size). extension_architecture = f"d{hp.name}_t{hp.network.n_tiers}_" \ f"l{'.'.join(map(str, hp.network.layers))}_" \ f"hd{hp.network.hidden_size}_gmm{hp.network.gmm_size}" timestamp = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}" # 3 Create directories for saving logs and model weights if they do not exist # 3.1 Create model weights directory for this run (the same directory will be used for different # runs of a model with same architecture and the difference will be in the file stored) hp["training"][ "dir_chkpt"] = hp.training.dir_chkpt + extension_architecture Path(hp.training.dir_chkpt).mkdir(parents=True, exist_ok=True) # 3.2 Create general log directory for this run (the same directory will be used for different # runs of a model with same architecture and the difference will be in the file stored) hp["logging"]["dir_log"] = hp.logging.dir_log + extension_architecture Path(hp.logging.dir_log).mkdir(parents=True, exist_ok=True) # 4. Setup general logging (it will use the folder previously created and the filename will be: tier = str(args.tier) if args.tier is not None else 'ALL' filename = f"{hp.logging.dir_log}/tier{tier}_{timestamp}" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( filename=filename), # handler to save the log to a file logging.StreamHandler( ) # handler to output the log to the terminal ]) logger = logging.getLogger() # 5. Show device that will be used for training: CPU or GPU logger.info(f"Device for training: {hp.device}") # 6. Start training of the model (or a single tier, depending on args) train_model(args, hp, extension_architecture, timestamp, logger)
def get_default_model_parameters(): return merge_params( ModelTemplate.get_default_model_parameters(), HParams( use_mtsa=False, use_direction=False, output_method="clf", global_afn='exp', attn_self=True, ) )
def get_default_specific_model_parameters(network_class, network_type): model_params = HParams(model_class=None) if network_type is not None: model_module_name = 'model_%s' % network_type model_class_name = underline_to_camel(model_module_name) try: src_module = __import__('src.models.%s.%s' % (network_class, model_module_name)) model_class = eval( 'src_module.models.%s.%s.%s' % (network_class, model_module_name, model_class_name)) model_params = model_class.get_default_model_parameters() model_params.add_hparam('model_class', model_class) # add model class except ImportError: print('Fatal Error: no model module: \"src.models.%s.%s\"' % (network_class, model_module_name)) except AttributeError: print( 'Fatal Error: probably (1) no model class named as %s.%s, ' 'or (2) the class no \"get_default_model_parameters()\"' % (network_class, model_module_name)) return model_params
def get_default_training_params(): hparams = HParams( optimizer='openai_adam', grad_norm=1., n_steps=90000, lr=6.25e-5, # control save_model=False, save_num=3, load_model=False, load_path='', summary_period=1000, eval_period=500, train_batch_size=20, test_batch_size=24, ) return hparams
def get_default_model_parameters(): return HParams( embd_dim=768, n_hidden=768, n_ctx=512, embd_dropout=0.9, resid_dropout=0.9, attn_dropout=0.9, clf_dropout=0.9, n_layer=12, # n_transfer=12, n_head=12, afn='gelu', clf_afn='elu', lm_coef=0.3, lr_schd='warmup_linear', lr_warmup=0.002, highway=False, use_pe=True, )
def setup_synthesize(args: argparse.Namespace): """ Sets up synthesis with the parameters specified in args and the path to the weights of the model Args: args (argparse.Namespace): parameters to set up the synthesis. """ # 1. Read hyperparameters from file hp = HParams.from_yaml(args.path_config) synthesisp = HParams.from_yaml(args.path_synthesis) # check if GPU available and add it to parameters hp["device"] = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # 2. Create extension of the architecture of the model and timestamp for this run (use to # identify folders and files created for this run) # format: f(params_file)_t(n_tiers)_l(n_layers)_hd(hidden_size)_gmm(gmm_size). extension_architecture = f"d{hp.name}_t{hp.network.n_tiers}_" \ f"l{'.'.join(map(str, hp.network.layers))}_" \ f"hd{hp.network.hidden_size}_gmm{hp.network.gmm_size}" timestamp = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}" # 3 Create directories for saving logs and output if they do not exist # 3.1 Create general log directory for this run (the same directory will be used for different # runs of a model with same architecture and the difference will be in the file stored) hp["logging"]["dir_log"] = hp.logging.dir_log + extension_architecture Path(hp.logging.dir_log).mkdir(parents=True, exist_ok=True) # 3.2 Create directory for the outputs of this run (the same directory will be used for # different runs of a model with same architecture and the difference will be in the weights # of the model) synthesisp.output_path = synthesisp.output_path + extension_architecture Path(synthesisp.output_path).mkdir(parents=True, exist_ok=True) # 4. Setup general logging (it will use the folder previously created and the filename will be: filename = f"{hp.logging.dir_log}/synthesis_{timestamp}" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( filename=filename), # handler to save the log to a file logging.StreamHandler( ) # handler to output the log to the terminal ]) logger = logging.getLogger() # 5. Show device that will be used for training: CPU or GPU logger.info(f"Device for training: {hp.device}") # 6. Setup tensorboard logging # 6.1 Create tensorboard logs directory (tensorboard requires a different folder for # each run of the model, in this case every run to train a tier) so we add the extension # of the network's architecture of this run and the timestamp to identify it completely tensorboard_dir = hp.logging.dir_log_tensorboard + extension_architecture \ + f"synthesis_{timestamp}" Path(tensorboard_dir).mkdir(parents=True, exist_ok=True) # 2.2 Create tensorboard writer tensorboardwriter = TensorboardWriter(hp, tensorboard_dir) synthesize(args, hp, synthesisp, extension_architecture, timestamp, tensorboardwriter, logger) tensorboardwriter.close()
def all_params(self): all_params = HParams() for hparam_name in reversed(self.hparam_name_list): cur_params = getattr(self, hparam_name) all_params = merge_params(all_params, cur_params) return all_params
def get_default_model_parameters(): return HParams()
def get_default_preprocessing_params(): params = HParams( max_sent_len=50, load_preproc=True, ) return params
def __init__(self): self.hparam_name_list = [] # ------parsing input arguments"-------- parser = argparse.ArgumentParser() parser.register('type', 'bool', (lambda x: x.lower() in ("yes", "true", "t", "1"))) parser.add_argument('--mode', type=str, default='train', help='train_tasks') parser.add_argument('--dataset', type=str, default='snli', help='[snli|multinli_m|multinli_mm]') parser.add_argument('--network_class', type=str, default='transformer', help='None') parser.add_argument('--network_type', type=str, default=None, help='None') parser.add_argument('--gpu', type=str, default='3', help='selected gpu index') parser.add_argument('--gpu_mem', type=float, default=None, help='selected gpu index') parser.add_argument('--model_dir_prefix', type=str, default='prefix', help='model dir name prefix') parser.add_argument('--aws', type='bool', default=False, help='using aws') # parsing parameters group parser.add_argument('--preprocessing_params', type=str, default='', help='') parser.add_argument('--model_params', type=str, default='', help='') parser.add_argument('--training_params', type=str, default='', help='') parser.set_defaults(shuffle=True) args = parser.parse_args() self.parsed_params = HParams() for key, val in args.__dict__.items(): self.parsed_params.add_hparam(key, val) self.register_hparams(self.parsed_params, 'parsed_params') # pre-processed self.preprocessed_params = self.get_default_preprocessing_params() self.preprocessed_params.parse(self.parsed_params.preprocessing_params) self.register_hparams(self.preprocessed_params, 'preprocessed_params') # model self.model_params = merge_params( self.get_default_model_parameters(), self.get_default_specific_model_parameters( self.parsed_params.network_class, self.parsed_params.network_type)) self.model_params.parse(self.parsed_params.model_params) self.register_hparams(self.model_params, 'model_params') # traning self.training_params = self.get_default_training_params() self.training_params.parse(self.parsed_params.training_params) self.register_hparams(self.training_params, 'training_params')
class ParamsCenter(object): def __init__(self): self.hparam_name_list = [] # ------parsing input arguments"-------- parser = argparse.ArgumentParser() parser.register('type', 'bool', (lambda x: x.lower() in ("yes", "true", "t", "1"))) parser.add_argument('--mode', type=str, default='train', help='train_tasks') parser.add_argument('--dataset', type=str, default='snli', help='[snli|multinli_m|multinli_mm]') parser.add_argument('--network_class', type=str, default='transformer', help='None') parser.add_argument('--network_type', type=str, default=None, help='None') parser.add_argument('--gpu', type=str, default='3', help='selected gpu index') parser.add_argument('--gpu_mem', type=float, default=None, help='selected gpu index') parser.add_argument('--model_dir_prefix', type=str, default='prefix', help='model dir name prefix') parser.add_argument('--aws', type='bool', default=False, help='using aws') # parsing parameters group parser.add_argument('--preprocessing_params', type=str, default='', help='') parser.add_argument('--model_params', type=str, default='', help='') parser.add_argument('--training_params', type=str, default='', help='') parser.set_defaults(shuffle=True) args = parser.parse_args() self.parsed_params = HParams() for key, val in args.__dict__.items(): self.parsed_params.add_hparam(key, val) self.register_hparams(self.parsed_params, 'parsed_params') # pre-processed self.preprocessed_params = self.get_default_preprocessing_params() self.preprocessed_params.parse(self.parsed_params.preprocessing_params) self.register_hparams(self.preprocessed_params, 'preprocessed_params') # model self.model_params = merge_params( self.get_default_model_parameters(), self.get_default_specific_model_parameters( self.parsed_params.network_class, self.parsed_params.network_type)) self.model_params.parse(self.parsed_params.model_params) self.register_hparams(self.model_params, 'model_params') # traning self.training_params = self.get_default_training_params() self.training_params.parse(self.parsed_params.training_params) self.register_hparams(self.training_params, 'training_params') @staticmethod def get_default_preprocessing_params(): params = HParams( max_sent_len=50, load_preproc=True, ) return params @staticmethod def get_default_model_parameters(): return HParams() @staticmethod def get_default_training_params(): hparams = HParams( optimizer='openai_adam', grad_norm=1., n_steps=90000, lr=6.25e-5, # control save_model=False, save_num=3, load_model=False, load_path='', summary_period=1000, eval_period=500, train_batch_size=20, test_batch_size=24, ) return hparams @staticmethod def get_default_specific_model_parameters(network_class, network_type): model_params = HParams(model_class=None) if network_type is not None: model_module_name = 'model_%s' % network_type model_class_name = underline_to_camel(model_module_name) try: src_module = __import__('src.models.%s.%s' % (network_class, model_module_name)) model_class = eval( 'src_module.models.%s.%s.%s' % (network_class, model_module_name, model_class_name)) model_params = model_class.get_default_model_parameters() model_params.add_hparam('model_class', model_class) # add model class except ImportError: print('Fatal Error: no model module: \"src.models.%s.%s\"' % (network_class, model_module_name)) except AttributeError: print( 'Fatal Error: probably (1) no model class named as %s.%s, ' 'or (2) the class no \"get_default_model_parameters()\"' % (network_class, model_module_name)) return model_params # ============== Utils ============= def register_hparams(self, hparams, name): assert isinstance(hparams, HParams) assert isinstance(name, str) assert name not in self.hparam_name_list self.hparam_name_list.append(name) setattr(self, name, hparams) @property def all_params(self): all_params = HParams() for hparam_name in reversed(self.hparam_name_list): cur_params = getattr(self, hparam_name) all_params = merge_params(all_params, cur_params) return all_params def __getitem__(self, item): assert isinstance(item, str) for hparam_name in reversed(self.hparam_name_list): try: return getattr(getattr(self, hparam_name), item) except AttributeError: pass raise AttributeError('no item named as \'%s\'' % item)
def __init__(self, params_center): # add default and parsed parameters to cfg self.params_center = params_center self.dataset_dir = "./dataset" self.project_dir = "./" self.processed_name = self.get_params_str(['dataset' ]) + '_proprec.pickle' if self['network_type'] is None or self['network_type'] == 'test': self.model_name = '_test' else: model_name_params = [ 'dataset', 'network_class', 'network_type', 'lr', 'n_steps' ] if self['model_class'] is not None: model_name_params += self[ 'model_class'].get_identity_param_list() else: print('fatal error: can not reach the model class') self.model_name = self.get_params_str(model_name_params) self.ckpt_name = 'model_file.ckpt' self.log_name = 'log_' + Configs.time_suffix() + '.txt' if self['dataset'] == 'snli': data_name_pattern = 'snli_1.0_%s.jsonl' self.raw_data_dir = join(self.dataset_dir, 'snli_1.0') elif self['dataset'].startswith('multinli'): self.raw_data_dir = join(self.dataset_dir, 'multinli_1.0') if self['dataset'] == 'multinli_m': data_name_pattern = 'multinli_1.0_%s_matched.jsonl' elif self['dataset'] == 'multinli_mm': data_name_pattern = 'multinli_1.0_%s_mismatched.jsonl' else: raise AttributeError else: raise AttributeError self.train_data_name, self.dev_data_name, self.test_data_name = \ [data_name_pattern % name for name in ['train', 'dev', 'test']] # ------- dir ------- self.bpe_data_dir = join(self.dataset_dir, 'bpe') self.pretrained_transformer_dir = join(self.dataset_dir, 'pretrained_transformer') # self.runtime_dir = mp_join(self.project_dir, 'runtime') self.run_model_dir = mp_join(self.runtime_dir, 'run_model') self.processed_dir = mp_join(self.runtime_dir, 'preproc') self.cur_run_dir = mp_join(self.run_model_dir, self['model_dir_prefix'] + self.model_name) self.log_dir = mp_join(self.cur_run_dir, 'log_files') self.summary_dir = mp_join(self.cur_run_dir, 'summary') self.ckpt_dir = mp_join(self.cur_run_dir, 'ckpt') self.other_dir = mp_join(self.cur_run_dir, 'other') # path self.train_data_path = join(self.raw_data_dir, self.train_data_name) self.dev_data_path = join(self.raw_data_dir, self.dev_data_name) self.test_data_path = join(self.raw_data_dir, self.test_data_name) self.processed_path = join(self.processed_dir, self.processed_name) self.ckpt_path = join(self.ckpt_dir, self.ckpt_name) self.log_path = join(self.log_dir, self.log_name) # merge the paths to params path_params = HParams( train_data_path=self.train_data_path, dev_data_path=self.dev_data_path, test_data_path=self.test_data_path, bpe_data_dir=self.bpe_data_dir, pretrained_transformer_dir=self.pretrained_transformer_dir, runtime_dir=self.runtime_dir, run_model_dir=self.run_model_dir, processed_dir=self.processed_dir, cur_run_dir=self.cur_run_dir, log_dir=self.log_dir, summary_dir=self.summary_dir, ckpt_dir=self.ckpt_dir, other_dir=self.other_dir, # paths processed_path=self.processed_path, ckpt_path=self.ckpt_path, log_path=self.log_path, ) self.params_center.register_hparams(path_params, 'path_params') # logging setup log.basicConfig(format='%(asctime)s: %(message)s', level=log.INFO, datefmt='%m/%d %I:%M:%S %p') file_handler = log.FileHandler( self.log_path) # add a file handler to a logger log.getLogger().addHandler(file_handler) # other # cuda support os.environ['CUDA_VISIBLE_DEVICES'] = '' if self['gpu'].lower( ) == 'none' else self['gpu'] # import torch # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # other_params = HParams( intX='int32', floatX='float32', # device=device ) self.params_center.register_hparams(other_params, 'other_params') log.info(print_params(self.params, print_std=False))