コード例 #1
0
 def get_default_model_parameters():
     return merge_params(
         ModelTemplate.get_default_model_parameters(),
         HParams(
             token_rep_highway="none",  # [none|mlp,gate,add]
             seq_rep_highway="none",  # [none|mlp,gate,add]
         ))
コード例 #2
0
def setup_training(args: argparse.Namespace) -> None:
    """
    Sets up directories (logging and tensorboard), sets up the general logger and calls
    train_model(...) with the parameters specified in args for this run.
    The model to train can be a complete model with all the tiers (if args.tier == None) or a
    single tier (the one specified in args.tier).

    Args:
        args (argparse.Namespace): parameters to set up the training. At least, args must contain:
                                   args = {"path_config": ...,
                                           "tier": ...,
                                           "checkpoint_path": ...}
    """
    # 1. Read hyperparameters from file
    hp = HParams.from_yaml(args.path_config)
    # check if GPU available and add it to parameters
    hp["device"] = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')

    # 2. Create extension of the architecture of the model and timestamp for this run (use to
    # identify folders and files created for this run)
    # format: f(params_file)_t(n_tiers)_l(n_layers)_hd(hidden_size)_gmm(gmm_size).
    extension_architecture = f"d{hp.name}_t{hp.network.n_tiers}_" \
                             f"l{'.'.join(map(str, hp.network.layers))}_" \
                             f"hd{hp.network.hidden_size}_gmm{hp.network.gmm_size}"
    timestamp = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}"

    # 3 Create directories for saving logs and model weights if they do not exist
    # 3.1 Create model weights directory for this run (the same directory will be used for different
    #     runs of a model with same architecture and the difference will be in the file stored)
    hp["training"][
        "dir_chkpt"] = hp.training.dir_chkpt + extension_architecture
    Path(hp.training.dir_chkpt).mkdir(parents=True, exist_ok=True)
    # 3.2 Create general log directory for this run (the same directory will be used for different
    #     runs of a model with same architecture and the difference will be in the file stored)
    hp["logging"]["dir_log"] = hp.logging.dir_log + extension_architecture
    Path(hp.logging.dir_log).mkdir(parents=True, exist_ok=True)

    # 4. Setup general logging (it will use the folder previously created and the filename will be:
    tier = str(args.tier) if args.tier is not None else 'ALL'
    filename = f"{hp.logging.dir_log}/tier{tier}_{timestamp}"
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(
                filename=filename),  # handler to save the log to a file
            logging.StreamHandler(
            )  # handler to output the log to the terminal
        ])
    logger = logging.getLogger()

    # 5. Show device that will be used for training: CPU or GPU
    logger.info(f"Device for training: {hp.device}")

    # 6. Start training of the model (or a single tier, depending on args)
    train_model(args, hp, extension_architecture, timestamp, logger)
コード例 #3
0
 def get_default_model_parameters():
     return merge_params(
         ModelTemplate.get_default_model_parameters(),
         HParams(
             use_mtsa=False,
             use_direction=False,
             output_method="clf",
             global_afn='exp',
             attn_self=True,
         )
     )
コード例 #4
0
 def get_default_specific_model_parameters(network_class, network_type):
     model_params = HParams(model_class=None)
     if network_type is not None:
         model_module_name = 'model_%s' % network_type
         model_class_name = underline_to_camel(model_module_name)
         try:
             src_module = __import__('src.models.%s.%s' %
                                     (network_class, model_module_name))
             model_class = eval(
                 'src_module.models.%s.%s.%s' %
                 (network_class, model_module_name, model_class_name))
             model_params = model_class.get_default_model_parameters()
             model_params.add_hparam('model_class',
                                     model_class)  # add model class
         except ImportError:
             print('Fatal Error: no model module: \"src.models.%s.%s\"' %
                   (network_class, model_module_name))
         except AttributeError:
             print(
                 'Fatal Error: probably (1) no model class named as %s.%s, '
                 'or (2) the class no \"get_default_model_parameters()\"' %
                 (network_class, model_module_name))
     return model_params
コード例 #5
0
    def get_default_training_params():
        hparams = HParams(
            optimizer='openai_adam',
            grad_norm=1.,
            n_steps=90000,
            lr=6.25e-5,

            # control
            save_model=False,
            save_num=3,
            load_model=False,
            load_path='',
            summary_period=1000,
            eval_period=500,
            train_batch_size=20,
            test_batch_size=24,
        )
        return hparams
コード例 #6
0
ファイル: model_template.py プロジェクト: taoshen58/mtsa
 def get_default_model_parameters():
     return HParams(
         embd_dim=768,
         n_hidden=768,
         n_ctx=512,
         embd_dropout=0.9,
         resid_dropout=0.9,
         attn_dropout=0.9,
         clf_dropout=0.9,
         n_layer=12,
         # n_transfer=12,
         n_head=12,
         afn='gelu',
         clf_afn='elu',
         lm_coef=0.3,
         lr_schd='warmup_linear',
         lr_warmup=0.002,
         highway=False,
         use_pe=True,
     )
コード例 #7
0
def setup_synthesize(args: argparse.Namespace):
    """
    Sets up synthesis with the parameters specified in args and the path to the weights of the model

    Args:
        args (argparse.Namespace): parameters to set up the synthesis.
    """
    # 1. Read hyperparameters from file
    hp = HParams.from_yaml(args.path_config)
    synthesisp = HParams.from_yaml(args.path_synthesis)
    # check if GPU available and add it to parameters
    hp["device"] = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')

    # 2. Create extension of the architecture of the model and timestamp for this run (use to
    # identify folders and files created for this run)
    # format: f(params_file)_t(n_tiers)_l(n_layers)_hd(hidden_size)_gmm(gmm_size).
    extension_architecture = f"d{hp.name}_t{hp.network.n_tiers}_" \
                             f"l{'.'.join(map(str, hp.network.layers))}_" \
                             f"hd{hp.network.hidden_size}_gmm{hp.network.gmm_size}"
    timestamp = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}"

    # 3 Create directories for saving logs and output if they do not exist
    # 3.1 Create general log directory for this run (the same directory will be used for different
    #     runs of a model with same architecture and the difference will be in the file stored)
    hp["logging"]["dir_log"] = hp.logging.dir_log + extension_architecture
    Path(hp.logging.dir_log).mkdir(parents=True, exist_ok=True)
    # 3.2 Create directory for the outputs of this run (the same directory will be used for
    #     different runs of a model with same architecture and the difference will be in the weights
    #     of the model)
    synthesisp.output_path = synthesisp.output_path + extension_architecture
    Path(synthesisp.output_path).mkdir(parents=True, exist_ok=True)

    # 4. Setup general logging (it will use the folder previously created and the filename will be:
    filename = f"{hp.logging.dir_log}/synthesis_{timestamp}"
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(
                filename=filename),  # handler to save the log to a file
            logging.StreamHandler(
            )  # handler to output the log to the terminal
        ])
    logger = logging.getLogger()

    # 5. Show device that will be used for training: CPU or GPU
    logger.info(f"Device for training: {hp.device}")

    # 6. Setup tensorboard logging
    # 6.1 Create tensorboard logs directory (tensorboard requires a different folder for
    # each run of the model, in this case every run to train a tier) so we add the extension
    # of the network's architecture of this run and the timestamp to identify it completely
    tensorboard_dir = hp.logging.dir_log_tensorboard + extension_architecture \
                      + f"synthesis_{timestamp}"
    Path(tensorboard_dir).mkdir(parents=True, exist_ok=True)
    # 2.2 Create tensorboard writer
    tensorboardwriter = TensorboardWriter(hp, tensorboard_dir)

    synthesize(args, hp, synthesisp, extension_architecture, timestamp,
               tensorboardwriter, logger)

    tensorboardwriter.close()
コード例 #8
0
 def all_params(self):
     all_params = HParams()
     for hparam_name in reversed(self.hparam_name_list):
         cur_params = getattr(self, hparam_name)
         all_params = merge_params(all_params, cur_params)
     return all_params
コード例 #9
0
 def get_default_model_parameters():
     return HParams()
コード例 #10
0
 def get_default_preprocessing_params():
     params = HParams(
         max_sent_len=50,
         load_preproc=True,
     )
     return params
コード例 #11
0
    def __init__(self):
        self.hparam_name_list = []
        # ------parsing input arguments"--------
        parser = argparse.ArgumentParser()
        parser.register('type', 'bool', (lambda x: x.lower() in
                                         ("yes", "true", "t", "1")))
        parser.add_argument('--mode',
                            type=str,
                            default='train',
                            help='train_tasks')
        parser.add_argument('--dataset',
                            type=str,
                            default='snli',
                            help='[snli|multinli_m|multinli_mm]')
        parser.add_argument('--network_class',
                            type=str,
                            default='transformer',
                            help='None')
        parser.add_argument('--network_type',
                            type=str,
                            default=None,
                            help='None')
        parser.add_argument('--gpu',
                            type=str,
                            default='3',
                            help='selected gpu index')
        parser.add_argument('--gpu_mem',
                            type=float,
                            default=None,
                            help='selected gpu index')
        parser.add_argument('--model_dir_prefix',
                            type=str,
                            default='prefix',
                            help='model dir name prefix')
        parser.add_argument('--aws',
                            type='bool',
                            default=False,
                            help='using aws')

        # parsing parameters group
        parser.add_argument('--preprocessing_params',
                            type=str,
                            default='',
                            help='')
        parser.add_argument('--model_params', type=str, default='', help='')
        parser.add_argument('--training_params', type=str, default='', help='')

        parser.set_defaults(shuffle=True)
        args = parser.parse_args()
        self.parsed_params = HParams()
        for key, val in args.__dict__.items():
            self.parsed_params.add_hparam(key, val)
        self.register_hparams(self.parsed_params, 'parsed_params')

        # pre-processed
        self.preprocessed_params = self.get_default_preprocessing_params()
        self.preprocessed_params.parse(self.parsed_params.preprocessing_params)
        self.register_hparams(self.preprocessed_params, 'preprocessed_params')

        # model
        self.model_params = merge_params(
            self.get_default_model_parameters(),
            self.get_default_specific_model_parameters(
                self.parsed_params.network_class,
                self.parsed_params.network_type))
        self.model_params.parse(self.parsed_params.model_params)
        self.register_hparams(self.model_params, 'model_params')

        # traning
        self.training_params = self.get_default_training_params()
        self.training_params.parse(self.parsed_params.training_params)
        self.register_hparams(self.training_params, 'training_params')
コード例 #12
0
class ParamsCenter(object):
    def __init__(self):
        self.hparam_name_list = []
        # ------parsing input arguments"--------
        parser = argparse.ArgumentParser()
        parser.register('type', 'bool', (lambda x: x.lower() in
                                         ("yes", "true", "t", "1")))
        parser.add_argument('--mode',
                            type=str,
                            default='train',
                            help='train_tasks')
        parser.add_argument('--dataset',
                            type=str,
                            default='snli',
                            help='[snli|multinli_m|multinli_mm]')
        parser.add_argument('--network_class',
                            type=str,
                            default='transformer',
                            help='None')
        parser.add_argument('--network_type',
                            type=str,
                            default=None,
                            help='None')
        parser.add_argument('--gpu',
                            type=str,
                            default='3',
                            help='selected gpu index')
        parser.add_argument('--gpu_mem',
                            type=float,
                            default=None,
                            help='selected gpu index')
        parser.add_argument('--model_dir_prefix',
                            type=str,
                            default='prefix',
                            help='model dir name prefix')
        parser.add_argument('--aws',
                            type='bool',
                            default=False,
                            help='using aws')

        # parsing parameters group
        parser.add_argument('--preprocessing_params',
                            type=str,
                            default='',
                            help='')
        parser.add_argument('--model_params', type=str, default='', help='')
        parser.add_argument('--training_params', type=str, default='', help='')

        parser.set_defaults(shuffle=True)
        args = parser.parse_args()
        self.parsed_params = HParams()
        for key, val in args.__dict__.items():
            self.parsed_params.add_hparam(key, val)
        self.register_hparams(self.parsed_params, 'parsed_params')

        # pre-processed
        self.preprocessed_params = self.get_default_preprocessing_params()
        self.preprocessed_params.parse(self.parsed_params.preprocessing_params)
        self.register_hparams(self.preprocessed_params, 'preprocessed_params')

        # model
        self.model_params = merge_params(
            self.get_default_model_parameters(),
            self.get_default_specific_model_parameters(
                self.parsed_params.network_class,
                self.parsed_params.network_type))
        self.model_params.parse(self.parsed_params.model_params)
        self.register_hparams(self.model_params, 'model_params')

        # traning
        self.training_params = self.get_default_training_params()
        self.training_params.parse(self.parsed_params.training_params)
        self.register_hparams(self.training_params, 'training_params')

    @staticmethod
    def get_default_preprocessing_params():
        params = HParams(
            max_sent_len=50,
            load_preproc=True,
        )
        return params

    @staticmethod
    def get_default_model_parameters():
        return HParams()

    @staticmethod
    def get_default_training_params():
        hparams = HParams(
            optimizer='openai_adam',
            grad_norm=1.,
            n_steps=90000,
            lr=6.25e-5,

            # control
            save_model=False,
            save_num=3,
            load_model=False,
            load_path='',
            summary_period=1000,
            eval_period=500,
            train_batch_size=20,
            test_batch_size=24,
        )
        return hparams

    @staticmethod
    def get_default_specific_model_parameters(network_class, network_type):
        model_params = HParams(model_class=None)
        if network_type is not None:
            model_module_name = 'model_%s' % network_type
            model_class_name = underline_to_camel(model_module_name)
            try:
                src_module = __import__('src.models.%s.%s' %
                                        (network_class, model_module_name))
                model_class = eval(
                    'src_module.models.%s.%s.%s' %
                    (network_class, model_module_name, model_class_name))
                model_params = model_class.get_default_model_parameters()
                model_params.add_hparam('model_class',
                                        model_class)  # add model class
            except ImportError:
                print('Fatal Error: no model module: \"src.models.%s.%s\"' %
                      (network_class, model_module_name))
            except AttributeError:
                print(
                    'Fatal Error: probably (1) no model class named as %s.%s, '
                    'or (2) the class no \"get_default_model_parameters()\"' %
                    (network_class, model_module_name))
        return model_params

    # ============== Utils =============
    def register_hparams(self, hparams, name):
        assert isinstance(hparams, HParams)
        assert isinstance(name, str)
        assert name not in self.hparam_name_list

        self.hparam_name_list.append(name)
        setattr(self, name, hparams)

    @property
    def all_params(self):
        all_params = HParams()
        for hparam_name in reversed(self.hparam_name_list):
            cur_params = getattr(self, hparam_name)
            all_params = merge_params(all_params, cur_params)
        return all_params

    def __getitem__(self, item):
        assert isinstance(item, str)

        for hparam_name in reversed(self.hparam_name_list):
            try:
                return getattr(getattr(self, hparam_name), item)
            except AttributeError:
                pass
        raise AttributeError('no item named as \'%s\'' % item)
コード例 #13
0
    def __init__(self, params_center):
        # add default and parsed parameters to cfg
        self.params_center = params_center
        self.dataset_dir = "./dataset"
        self.project_dir = "./"

        self.processed_name = self.get_params_str(['dataset'
                                                   ]) + '_proprec.pickle'

        if self['network_type'] is None or self['network_type'] == 'test':
            self.model_name = '_test'
        else:
            model_name_params = [
                'dataset', 'network_class', 'network_type', 'lr', 'n_steps'
            ]
            if self['model_class'] is not None:
                model_name_params += self[
                    'model_class'].get_identity_param_list()
            else:
                print('fatal error: can not reach the model class')
            self.model_name = self.get_params_str(model_name_params)

        self.ckpt_name = 'model_file.ckpt'
        self.log_name = 'log_' + Configs.time_suffix() + '.txt'

        if self['dataset'] == 'snli':
            data_name_pattern = 'snli_1.0_%s.jsonl'
            self.raw_data_dir = join(self.dataset_dir, 'snli_1.0')
        elif self['dataset'].startswith('multinli'):
            self.raw_data_dir = join(self.dataset_dir, 'multinli_1.0')
            if self['dataset'] == 'multinli_m':
                data_name_pattern = 'multinli_1.0_%s_matched.jsonl'
            elif self['dataset'] == 'multinli_mm':
                data_name_pattern = 'multinli_1.0_%s_mismatched.jsonl'
            else:
                raise AttributeError
        else:
            raise AttributeError
        self.train_data_name, self.dev_data_name, self.test_data_name = \
            [data_name_pattern % name for name in ['train', 'dev', 'test']]
        # -------  dir -------
        self.bpe_data_dir = join(self.dataset_dir, 'bpe')
        self.pretrained_transformer_dir = join(self.dataset_dir,
                                               'pretrained_transformer')

        #
        self.runtime_dir = mp_join(self.project_dir, 'runtime')
        self.run_model_dir = mp_join(self.runtime_dir, 'run_model')
        self.processed_dir = mp_join(self.runtime_dir, 'preproc')

        self.cur_run_dir = mp_join(self.run_model_dir,
                                   self['model_dir_prefix'] + self.model_name)
        self.log_dir = mp_join(self.cur_run_dir, 'log_files')
        self.summary_dir = mp_join(self.cur_run_dir, 'summary')
        self.ckpt_dir = mp_join(self.cur_run_dir, 'ckpt')
        self.other_dir = mp_join(self.cur_run_dir, 'other')

        # path
        self.train_data_path = join(self.raw_data_dir, self.train_data_name)
        self.dev_data_path = join(self.raw_data_dir, self.dev_data_name)
        self.test_data_path = join(self.raw_data_dir, self.test_data_name)

        self.processed_path = join(self.processed_dir, self.processed_name)
        self.ckpt_path = join(self.ckpt_dir, self.ckpt_name)
        self.log_path = join(self.log_dir, self.log_name)

        # merge the paths to params
        path_params = HParams(
            train_data_path=self.train_data_path,
            dev_data_path=self.dev_data_path,
            test_data_path=self.test_data_path,
            bpe_data_dir=self.bpe_data_dir,
            pretrained_transformer_dir=self.pretrained_transformer_dir,
            runtime_dir=self.runtime_dir,
            run_model_dir=self.run_model_dir,
            processed_dir=self.processed_dir,
            cur_run_dir=self.cur_run_dir,
            log_dir=self.log_dir,
            summary_dir=self.summary_dir,
            ckpt_dir=self.ckpt_dir,
            other_dir=self.other_dir,
            # paths
            processed_path=self.processed_path,
            ckpt_path=self.ckpt_path,
            log_path=self.log_path,
        )
        self.params_center.register_hparams(path_params, 'path_params')

        # logging setup
        log.basicConfig(format='%(asctime)s: %(message)s',
                        level=log.INFO,
                        datefmt='%m/%d %I:%M:%S %p')
        file_handler = log.FileHandler(
            self.log_path)  # add a file handler to a logger
        log.getLogger().addHandler(file_handler)

        # other
        # cuda support
        os.environ['CUDA_VISIBLE_DEVICES'] = '' if self['gpu'].lower(
        ) == 'none' else self['gpu']
        # import torch
        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #
        other_params = HParams(
            intX='int32',
            floatX='float32',
            # device=device
        )
        self.params_center.register_hparams(other_params, 'other_params')

        log.info(print_params(self.params, print_std=False))