# download model model_info = pytorch_kobert model_path = download(model_info['url'], model_info['fname'], model_info['chksum'], cachedir=cachedir) # download vocab vocab_info = tokenizer vocab_path = download(vocab_info['url'], vocab_info['fname'], vocab_info['chksum'], cachedir=cachedir) ################################################################################################# print('BERT 모델 선언') bertmodel = BertModel(config=BertConfig.from_dict(bert_config)) bertmodel.state_dict(torch.load(model_path)) print("GPU 디바이스 세팅") device = torch.device(ctx) bertmodel.to(device) bertmodel.train() vocab = nlp.vocab.BERTVocab.from_sentencepiece(vocab_path, padding_token='[PAD]') ################################################################################################# # 파라미터 세팅 tokenizer = get_tokenizer() tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False) max_len = 64
def __init__( self, config: MTDNNConfig, task_defs: MTDNNTaskDefs, pretrained_model_name: str = "mtdnn-base-uncased", num_train_step: int = -1, decoder_opts: list = None, task_types: list = None, dropout_list: list = None, loss_types: list = None, kd_loss_types: list = None, tasks_nclass_list: list = None, multitask_train_dataloader: DataLoader = None, dev_dataloaders_list: list = None, # list of dataloaders test_dataloaders_list: list = None, # list of dataloaders test_datasets_list: list = ["mnli_mismatched", "mnli_matched"], output_dir: str = "checkpoint", log_dir: str = "tensorboard_logdir", ): # Input validation assert ( config.init_checkpoint in self.supported_init_checkpoints() ), f"Initial checkpoint must be in {self.supported_init_checkpoints()}" assert decoder_opts, "Decoder options list is required!" assert task_types, "Task types list is required!" assert dropout_list, "Task dropout list is required!" assert loss_types, "Loss types list is required!" assert kd_loss_types, "KD Loss types list is required!" assert tasks_nclass_list, "Tasks nclass list is required!" assert (multitask_train_dataloader ), "DataLoader for multiple tasks cannot be None" assert test_datasets_list, "Pass a list of test dataset prefixes" super(MTDNNModel, self).__init__(config) # Initialize model config and update with training options self.config = config self.update_config_with_training_opts( decoder_opts, task_types, dropout_list, loss_types, kd_loss_types, tasks_nclass_list, ) self.task_defs = task_defs self.multitask_train_dataloader = multitask_train_dataloader self.dev_dataloaders_list = dev_dataloaders_list self.test_dataloaders_list = test_dataloaders_list self.test_datasets_list = test_datasets_list self.output_dir = output_dir self.log_dir = log_dir # Create the output_dir if it's doesn't exist MTDNNCommonUtils.create_directory_if_not_exists(self.output_dir) self.tensor_board = SummaryWriter(log_dir=self.log_dir) self.pooler = None # Resume from model checkpoint if self.config.resume and self.config.model_ckpt: assert os.path.exists( self.config.model_ckpt), "Model checkpoint does not exist" logger.info(f"loading model from {self.config.model_ckpt}") self = self.load(self.config.model_ckpt) return # Setup the baseline network # - Define the encoder based on config options # - Set state dictionary based on configuration setting # - Download pretrained model if flag is set # TODO - Use Model.pretrained_model() after configuration file is hosted. if self.config.use_pretrained_model: with MTDNNCommonUtils.download_path() as file_path: path = pathlib.Path(file_path) self.local_model_path = MTDNNCommonUtils.maybe_download( url=self. pretrained_model_archive_map[pretrained_model_name]) self.bert_model = MTDNNCommonUtils.load_pytorch_model( self.local_model_path) self.state_dict = self.bert_model["state"] else: # Set the config base on encoder type set for initial checkpoint if config.encoder_type == EncoderModelType.BERT: self.bert_config = BertConfig.from_dict(self.config.to_dict()) self.bert_model = BertModel.from_pretrained( self.config.init_checkpoint) self.state_dict = self.bert_model.state_dict() self.config.hidden_size = self.bert_config.hidden_size if config.encoder_type == EncoderModelType.ROBERTA: # Download and extract from PyTorch hub if not downloaded before self.bert_model = torch.hub.load("pytorch/fairseq", config.init_checkpoint) self.config.hidden_size = self.bert_model.args.encoder_embed_dim self.pooler = LinearPooler(self.config.hidden_size) new_state_dict = {} for key, val in self.bert_model.state_dict().items(): if key.startswith("model.decoder.sentence_encoder" ) or key.startswith( "model.classification_heads"): key = f"bert.{key}" new_state_dict[key] = val # backward compatibility PyTorch <= 1.0.0 if key.startswith("classification_heads"): key = f"bert.model.{key}" new_state_dict[key] = val self.state_dict = new_state_dict self.updates = (self.state_dict["updates"] if self.state_dict and "updates" in self.state_dict else 0) self.local_updates = 0 self.train_loss = AverageMeter() self.network = SANBERTNetwork( init_checkpoint_model=self.bert_model, pooler=self.pooler, config=self.config, ) if self.state_dict: self.network.load_state_dict(self.state_dict, strict=False) self.mnetwork = (nn.DataParallel(self.network) if self.config.multi_gpu_on else self.network) self.total_param = sum([ p.nelement() for p in self.network.parameters() if p.requires_grad ]) # Move network to GPU if device available and flag set if self.config.cuda: self.network.cuda(device=self.config.cuda_device) self.optimizer_parameters = self._get_param_groups() self._setup_optim(self.optimizer_parameters, self.state_dict, num_train_step) self.para_swapped = False self.optimizer.zero_grad() self._setup_lossmap()
def get_kobert_config(): return BertConfig.from_dict(kobert_config)