def __init__(self, config, num_labels, use_pretrained=True): """Constructor""" super().__init__(config, num_labels) self.use_pretrained = use_pretrained if config.model_id is not None: self.pretrained_id = config.model_id else: self.pretrained_id = "bert-base-uncased" config_args = { "pretrained_model_name_or_path": self.pretrained_id, "num_labels": self.num_labels } # suspend logging lvl = logging.getLogger().level logging.getLogger().setLevel(logging.WARN) if use_pretrained: model = BertForSequenceClassification.from_pretrained( self.pretrained_id, num_labels=num_labels, output_hidden_states=False, output_attentions=False) else: model = BertForSequenceClassification( BertConfig(num_labels=num_labels, output_hidden_states=False, output_attentions=False)) logging.getLogger().setLevel(lvl) self.model = model
def __init__(self, config): super(BertModelForMedical, self).__init__(config) self.bert_model = BertForSequenceClassification(config) # for param in self.bert_model.parameters(): # param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.fc = nn.Linear(config.hidden_size, config.num_labels)
def __init__(self, model_path=None, config=None): #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.device = torch.device("cpu") # load tokenizer self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # load model configuration if config is None: config = BertConfig() # path to save model file if model_path is None: base_dir = os.path.dirname(os.path.realpath(__file__)) model_dir = os.path.join(base_dir, '.models') os.makedirs(model_dir, exist_ok=True) url = "https://www.dropbox.com/s/jw18aln9rmg69d6/BERT_Weights.pt?dl=0" model_name = os.path.split(url)[-1][:-5] model_path = os.path.join(model_dir, model_name) # download model if not os.path.exists(model_path): subprocess.call(['wget', url, '-O', model_path]) # load pre-trained model self.model = BertForSequenceClassification(config) self.model.load_state_dict(torch.load(model_path, map_location=self.device))
def train(): parser = argparse.ArgumentParser() # load model and tokenizer # MODEL_NAME = "bert-base-multilingual-cased" MODEL_NAME = args.model_name # "distilbert-base-multilingual-cased" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # load dataset train_dataset = load_data("../input/data/train/train.tsv") #dev_dataset = load_data("./dataset/train/dev.tsv") train_label = train_dataset['label'].values #dev_label = dev_dataset['label'].values # tokenizing dataset tokenized_train = tokenized_dataset(train_dataset, tokenizer) #tokenized_dev = tokenized_dataset(dev_dataset, tokenizer) # make dataset for pytorch. RE_train_dataset = RE_Dataset(tokenized_train, train_label) #RE_dev_dataset = RE_Dataset(tokenized_dev, dev_label) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # setting model hyperparameter bert_config = BertConfig.from_pretrained(MODEL_NAME) bert_config.num_labels = 42 model = BertForSequenceClassification(bert_config) model.parameters model.to(device) # 사용한 option 외에도 다양한 option들이 있습니다. # https://huggingface.co/transformers/main_classes/trainer.html#trainingarguments 참고해주세요. training_args = TrainingArguments( output_dir=f'./results/{MODEL_NAME}', # output directory save_total_limit=3, # number of total save model. save_steps=500, # model saving step. # num_train_epochs=4, # total number of training epochs num_train_epochs=5, # total number of training epochs learning_rate=5e-5, # learning_rate per_device_train_batch_size=16, # batch size per device during training #per_device_eval_batch_size=16, # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay logging_dir='./logs', # directory for storing logs logging_steps=100, # log saving step. #evaluation_strategy='steps', # evaluation strategy to adopt during training # `no`: No evaluation during training. # `steps`: Evaluate every `eval_steps`. # `epoch`: Evaluate every end of epoch. #eval_steps = 500, # evaluation step. #load_best_model_at_end = True, # When set to True, the parameters save_strategy and save_steps will be ignored and the model will be saved after each evaluation. ) trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=RE_train_dataset, # training dataset #eval_dataset=RE_dev_dataset, # evaluation dataset #compute_metrics=compute_metrics # define metrics function ) # train model trainer.train()
def load(args, checkpoint_dir): state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth')) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' in k: namekey = k[7:] # remove `module.` else: namekey = k new_state_dict[namekey] = v if args.model_type == 'bert': config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin')) model = BertForSequenceClassification(config) model.load_state_dict(new_state_dict) elif args.model_type == 'cnn': model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'lstm': model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, hidden_size=args.hidden_size, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'char-cnn': model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) model.load_state_dict(new_state_dict) else: raise ValueError('model type is not found!') return model.to(args.device)
def setUp(self): self.model = BertForSequenceClassification(BertConfig()) self.model.add_adapter("a") self.model.add_adapter("b") self.model.add_adapter("c") self.model.add_adapter("d") self.model.to(torch_device) self.model.train()
def test_sequ_classification_model_head_labels(self): model = BertForSequenceClassification(self.config) with TemporaryDirectory() as temp_dir: model.save_head(temp_dir) model.load_head(temp_dir) self.assertEqual(self.labels, model.get_labels()) self.assertDictEqual(self.label_map, model.get_labels_dict())
def create_and_check_for_sequence_classification( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): config.num_labels = self.num_labels model = BertForSequenceClassification(config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
def load(self, fname=None): if fname is not None: self.load_path = fname if self.pretrained_bert and not Path(self.pretrained_bert).is_file(): self.model = BertForSequenceClassification.from_pretrained( self.pretrained_bert, num_labels=self.n_classes, output_attentions=False, output_hidden_states=False) elif self.bert_config_file and Path(self.bert_config_file).is_file(): self.bert_config = BertConfig.from_json_file( str(expand_path(self.bert_config_file))) if self.attention_probs_keep_prob is not None: self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob if self.hidden_keep_prob is not None: self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob self.model = BertForSequenceClassification(config=self.bert_config) else: raise ConfigError("No pre-trained BERT model is given.") self.model.to(self.device) self.optimizer = getattr(torch.optim, self.optimizer_name)( self.model.parameters(), **self.optimizer_parameters) if self.lr_scheduler_name is not None: self.lr_scheduler = getattr(torch.optim.lr_scheduler, self.lr_scheduler_name)( self.optimizer, **self.lr_scheduler_parameters) if self.load_path: log.info(f"Load path {self.load_path} is given.") if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") weights_path = Path(self.load_path.resolve()) weights_path = weights_path.with_suffix(f".pth.tar") if weights_path.exists(): log.info(f"Load path {weights_path} exists.") log.info( f"Initializing `{self.__class__.__name__}` from saved.") # now load the weights, optimizer from saved log.info(f"Loading weights from {weights_path}.") checkpoint = torch.load(weights_path, map_location=self.device) self.model.load_state_dict(checkpoint["model_state_dict"]) self.optimizer.load_state_dict( checkpoint["optimizer_state_dict"]) self.epochs_done = checkpoint.get("epochs_done", 0) else: log.info( f"Init from scratch. Load path {weights_path} does not exist." )
def __init__(self, hyperparameters, check_ids: bool = False): super().__init__(hyperparameters) self.check_ids = check_ids # super light BERT model config = BertConfig(hidden_size=12, num_hidden_layers=1, num_attention_heads=1, intermediate_size=12) self.model = BertForSequenceClassification(config)
def __init__(self, wv): super(MeshRelClassifier, self).__init__() self.wv_mdl = wv config = BertConfig(vocab_size=wv.vocab_size, hidden_size=wv.dim, num_hidden_layers=2) self.bert = BertForSequenceClassification(config) self.load_embeddings_from_vocab() self.is_emb_frozen = True self.we = self.bert.bert.embeddings.word_embeddings
def __init__(self): super(BERTdownsized, self).__init__() options_name = "bert-base-uncased" from transformers import BertConfig configuration = BertConfig() configuration.num_hidden_layers = 12 self.encoder = BertForSequenceClassification(configuration) # import pdb;pdb.set_trace() print(self.encoder)
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): # Initialise PyTorch model config = BertConfig.from_json_file(bert_config_file) print("Building PyTorch model from configuration: {}".format(str(config))) model = BertForSequenceClassification(config) # Load weights from tf checkpoint load_tf_weights_in_bert(model, config, tf_checkpoint_path) # Save pytorch-model print("Save PyTorch model to {}".format(pytorch_dump_path)) torch.save(model.state_dict(), pytorch_dump_path)
def __init__(self, config, dim_emb=768): super(jointModalBert, self).__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) self.visual_proj = nn.Linear(config.visual_dim + config.hidden_size, config.hidden_size) self.audio_proj = nn.Linear(config.audio_dim + config.hidden_size, config.hidden_size) self.joint_proj = nn.Linear( config.audio_dim + config.visual_dim + config.hidden_size, config.hidden_size) self.seqBert = BertForSequenceClassification(config)
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): config.num_labels = self.num_labels model = BertForSequenceClassification(config) model.eval() loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) result = { "loss": loss, "logits": logits, } self.parent.assertListEqual( list(result["logits"].size()), [self.batch_size, self.num_labels]) self.check_loss_output(result)
def __init__(self, config, dim_emb=768): super(jointModalBert, self).__init__() # self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) self.visual_proj = nn.Linear(config.visual_dim + config.hidden_size, config.hidden_size) self.audio_proj = nn.Linear(config.audio_dim + config.hidden_size, config.hidden_size) self.joint_proj = nn.Linear( config.audio_dim + config.visual_dim + config.hidden_size, config.hidden_size) self.seqBert = BertForSequenceClassification(config) self.jointLayerNorm = torch.nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def load_word_embedding_model(model_type, task, vocab_path, word_tokenizer_class, emb_path, num_labels, lower=True): # Load config config = BertConfig.from_pretrained('bert-base-uncased') # Init word tokenizer word_tokenizer = word_tokenizer_class() # Load vocab _, vocab_map = load_vocab(vocab_path) tokenizer = SimpleTokenizer(vocab_map, word_tokenizer, lower=lower) vocab_list = list(tokenizer.vocab.keys()) # Adjust config if type(num_labels) == list: config.num_labels = max(num_labels) config.num_labels_list = num_labels else: config.num_labels = num_labels config.num_hidden_layers = num_labels if 'word2vec' in model_type: embeddings = gen_embeddings(vocab_list, emb_path) config.hidden_size = 400 config.num_attention_heads = 8 else: # 'fasttext' embeddings = gen_embeddings(vocab_list, emb_path, emb_dim=300) config.hidden_size = 300 config.num_attention_heads = 10 config.vocab_size = len(embeddings) # Instantiate model if 'sequence_classification' == task: model = BertForSequenceClassification(config) model.bert.embeddings.word_embeddings.weight.data.copy_( torch.FloatTensor(embeddings)) elif 'token_classification' == task: model = BertForWordClassification(config) model.bert.embeddings.word_embeddings.weight.data.copy_( torch.FloatTensor(embeddings)) elif 'multi_label_classification' == task: model = BertForMultiLabelClassification(config) model.bert.embeddings.word_embeddings.weight.data.copy_( torch.FloatTensor(embeddings)) return model, tokenizer
def main(device='lazy', full_size=False): """ Load model to specified device. Ensure that any backends have been initialized by this point. :param device: name of device to load tensors to :param full_size: if true, use a full pretrained bert-base-cased model instead of a smaller variant """ torch.manual_seed(0) tokenized_datasets = tokenize_dataset(load_dataset('imdb')) small_train_dataset = tokenized_datasets['train'].shuffle(seed=42) \ .select(range(2)) train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8) if full_size: model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2) else: configuration = BertConfig( vocab_size=28996, hidden_size=32, num_hidden_layers=1, num_attention_heads=2, intermediate_size=32, hidden_act='gelu', hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0, max_position_embeddings=512, layer_norm_eps=1.0e-05, ) model = BertForSequenceClassification(configuration) model.to(device) num_epochs = 3 num_training_steps = num_epochs * len(train_dataloader) losses = train(model, num_epochs, num_training_steps, train_dataloader, device) # Get debug information from LTC if 'torch_mlir.reference_lazy_backend._REFERENCE_LAZY_BACKEND' in sys.modules: computation = lazy_backend.get_latest_computation() if computation: print(computation.debug_string()) print('Loss: ', losses) return model, losses
def get_model(): if args.model == 'trans': transformer_config = BertConfig.from_pretrained('bert-base-uncased', num_labels=args.labels) if args.init_only: model = BertForSequenceClassification( config=transformer_config).to(device) else: model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', config=transformer_config).to(device) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr) es = EarlyStopping(patience=args.patience, percentage=False, mode='max', min_delta=0.0) scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=0.05) else: if args.model == 'cnn': model = CNN_MODEL(tokenizer, args, n_labels=args.labels).to(device) elif args.model == 'lstm': model = LSTM_MODEL(tokenizer, args, n_labels=args.labels).to(device) optimizer = AdamW(model.parameters(), lr=args.lr) scheduler = ReduceLROnPlateau(optimizer, verbose=True) es = EarlyStopping(patience=args.patience, percentage=False, mode='max', min_delta=0.0) return model, optimizer, scheduler, es
def __init__(self, train_batch_size=16, eval_batch_size=8, max_length=128, lr=2e-5, eps=1e-6, n_epochs=11): """ :param train_batch_size: (int) Training batch size :param eval_batch_size: (int) Batch size while using the `predict` method. :param max_length: (int) Maximum length for padding :param lr: (float) Learning rate :param eps: (float) Adam optimizer epsilon parameter :param n_epochs: (int) Number of epochs to train """ # model parameters self.train_batch_size = train_batch_size self.eval_batch_size = eval_batch_size self.max_length = max_length self.lr = lr self.eps = eps self.n_epochs = n_epochs # Information to be set or updated later self.trainset = None self.categories = None self.labels = None self.model = None # Tokenizer self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # The model # # We first need to specify some configurations to the model configs = BertConfig.from_pretrained( 'bert-base-uncased', num_labels=3, type_vocab_size=8) # BERT configuration self.model = BertForSequenceClassification(configs) # We are changing the header classifier of the model (Which is initially a simple fully connect layer layer) clf = Net() self.model.classifier = clf self.model.to( device ) # putting the model on GPU if available otherwise device is CPU
def __init__(self, cfg=None, mdo_prob=0., mdo_num=1, num_classes=1, path=None): # unnecessary head is present super().__init__() if path is not None: self.backbone = BertForSequenceClassification.from_pretrained(path) self.backbone.config.output_hidden_states = True else: assert cfg is not None, 'Config should be provided if no pretrained path was specified.' self.backbone = BertForSequenceClassification(cfg) self.head = nn.Linear(self.backbone.config.hidden_size, num_classes) weights_init = torch.zeros(self.backbone.config.num_hidden_layers).float() self.cls_weights = torch.nn.Parameter(weights_init, requires_grad=True) self.mdo = None if mdo_prob > 0.: self.mdo = MultiDropoutHead(mdo_prob, mdo_num)
def __init__(self): set_seed() self.sess = [] self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.tokenizer = BertTokenizer.from_pretrained('../user_data/vocab') for model_name in ['bert', 'rbtl']: model_config = BertConfig.from_pretrained( pretrained_model_name_or_path= "../user_data/bert_source/{}_config.json".format(model_name)) model_config.vocab_size = len( pd.read_csv('../user_data/vocab', names=["score"])) self.model = BertForSequenceClassification(config=model_config) checkpoint = torch.load( '../user_data/save_model/{}_checkpoint.pth.tar'.format( model_name), map_location='cpu') self.model.load_state_dict(checkpoint['status']) #pytorch转onnx MODEL_ONNX_PATH = "./torch_{}_dynamic.onnx".format(model_name) OPERATOR_EXPORT_TYPE = torch._C._onnx.OperatorExportTypes.ONNX self.model.eval() org_dummy_input = make_train_dummy_input() inf_dummy_input = make_inference_dummy_input() dynamic_axes = { 'input_ids': [1], 'token_type_ids': [1], 'attention_mask': [1] } output = torch.onnx.export( self.model, org_dummy_input, MODEL_ONNX_PATH, verbose=False, operator_export_type=OPERATOR_EXPORT_TYPE, opset_version=10, input_names=['input_ids', 'token_type_ids', 'attention_mask'], output_names=['output'], dynamic_axes=dynamic_axes) self.sess.append(onnxruntime.InferenceSession(MODEL_ONNX_PATH))
def main(args): """ 주어진 dataset tsv 파일과 같은 형태일 경우 inference 가능한 코드입니다. """ seed_everything(args.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # load tokenizer TOK_NAME = args.token if TOK_NAME == "monologg/kobert": tokenizer = KoBertTokenizer.from_pretrained(TOK_NAME) else: tokenizer = AutoTokenizer.from_pretrained(TOK_NAME) # load my model bert_config = BertConfig.from_pretrained(TOK_NAME) bert_config.num_labels = args.num_labels bert_config.num_hidden_layers = args.num_hidden_layers model = BertForSequenceClassification(bert_config) model_dir = os.path.join(args.model_dir, args.name) model_path = os.path.join(model_dir, 'best.pth') # load test datset test_dataset_dir = "/opt/ml/input/data/test/test.tsv" test_dataset, test_label = load_test_dataset(test_dataset_dir, model, tokenizer, args) test_dataset = RE_Dataset(test_dataset, test_label) model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) # predict answer batch_size = args.batch_size print("Inference Start!!!") pred_answer = inference(model, test_dataset, device, batch_size) # make csv file with predicted answer # 아래 directory와 columns의 형태는 지켜주시기 바랍니다. output = pd.DataFrame(pred_answer, columns=['pred']) save_dir = os.path.join(args.output_dir, args.name) os.makedirs(save_dir, exist_ok=True) output.to_csv(os.path.join(save_dir, f'{args.name}.csv'), index=False)
def load(args, checkpoint_dir): state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth')) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' in k: namekey = k[7:] # remove `module.` else: namekey = k new_state_dict[namekey] = v if args.model_type == 'bert': config = BertConfig.from_json_file( os.path.join(checkpoint_dir, 'config.bin')) model = BertForSequenceClassification(config) model.load_state_dict(new_state_dict) elif args.model_type == 'bow': model = BOWModel(new_state_dict['embedding.weight'], n_vocab=args.vocab_size, embed_size=args.embed_size, hidden_size=args.hidden_size, num_classes=args.num_labels) model.load_state_dict(new_state_dict) elif args.model_type == 'decom_att': model = DecompAttentionModel(args.word_mat, n_vocab=args.vocab_size, embed_size=args.embed_size, hidden_size=args.hidden_size, num_classes=args.num_labels) model.load_state_dict(new_state_dict) elif args.model_type == 'esim': model = ESIM(vocab_size=args.vocab_size, embedding_dim=args.embed_size, hidden_size=args.hidden_size, embeddings=None, padding_idx=0, dropout=0.1, num_classes=args.num_labels, device=args.device) model.load_state_dict(new_state_dict) else: raise ValueError('model type is not found!') return model.to(args.device)
def get_model(model_args, device, embeddings=None): if model_args.model == 'transformer': transformer_config = BertConfig.from_pretrained('bert-base-uncased', num_labels=model_args.labels) if model_args.init_only: transformer_model = BertForSequenceClassification( config=transformer_config).to( device) model = BertWrapper(transformer_model) else: transformer_model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', config=transformer_config).to(device) model = BertWrapper(transformer_model) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ { 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=model_args.lr) scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=0.05) else: if model_args.model == 'cnn': model = CNN_MODEL(embeddings, model_args, n_labels=model_args.labels).to(device) elif model_args.model == 'lstm': model = LSTM_MODEL(embeddings, model_args, n_labels=model_args.labels).to(device) optimizer = AdamW(model.parameters(), lr=model_args.lr) scheduler = ReduceLROnPlateau(optimizer, verbose=True) return model, optimizer, scheduler
def __init__(self, model_name_or_dir, num_classes, fine_tune=True, state_dict=None, bert_config=None): super().__init__() assert model_name_or_dir is not None or bert_config is not None, "Either a name or directory containing a pretrained model or a custom bert config must be provided" if bert_config is None: self.model = BertForSequenceClassification.from_pretrained( model_name_or_dir, num_labels=num_classes, state_dict=state_dict) else: self.model = BertForSequenceClassification(config=bert_config) # Fine tune, freeze all other weights except classifier if fine_tune: self._freeze_base_weights()
def device_setup(self): """ 设备配置并加载BERT模型 :return: """ # 使用GPU,通过model.to(device)的方式使用 self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") model_save_path = self.config.get("result", "model_save_path") config_save_path = self.config.get("result", "config_save_path") vocab_save_path = self.config.get("result", "vocab_save_path") self.model_config = BertConfig.from_json_file(config_save_path) self.model = BertForSequenceClassification(self.model_config) self.state_dict = torch.load(model_save_path) self.model.load_state_dict(self.state_dict) self.tokenizer = transformers.BertTokenizer(vocab_save_path) self.model.to(self.device) self.model.eval()
def model_infer(config,test_load,k): print("***********load model weight*****************") model_config = model_config = BertConfig() model_config.vocab_size = len(pd.read_csv('../user_data/vocab',names=["score"])) model = BertForSequenceClassification(config=model_config) model.load_state_dict(torch.load('../user_data/save_model/{}_best_model.pth.tar'.format(config.model_name))['status']) model = model.to(config.device) print("***********make predict for test file*****************") model.eval() predict_all = [] with torch.no_grad(): for batch, (input_ids, token_type_ids, attention_mask, label) in enumerate(test_load): input_ids = input_ids.to(config.device) attention_mask = attention_mask.to(config.device) token_type_ids = token_type_ids.to(config.device) outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) logits = outputs.logits pred_pob = torch.nn.functional.softmax(logits, dim=1)[:, 1] predict_all.extend(list(pred_pob.detach().cpu().numpy())) # submit_result(predict) if k==0: df=pd.DataFrame(predict_all,columns=["{}_socre".format(k+1)]) df.to_csv('./{}_result.csv'.format(config.model_name),index=False) else: df=pd.read_csv('./{}_result.csv'.format(config.model_name)) df["{}_socre".format(k+1)] = predict_all df.to_csv('./{}_result.csv'.format(config.model_name),index=False) print("***********done*****************")
def test_auto_set_save_adapters(self): model = BertForSequenceClassification( BertConfig( hidden_size=32, num_hidden_layers=4, num_attention_heads=4, intermediate_size=37, ) ) model.add_adapter("adapter1") model.add_adapter("adapter2") model.add_adapter_fusion(Fuse("adapter1", "adapter2")) model.train_adapter_fusion(Fuse("adapter1", "adapter2")) training_args = TrainingArguments( output_dir="./examples", ) trainer = AdapterTrainer( model=model, args=training_args, ) self.assertTrue(trainer.train_adapter_fusion)
def convert_tf2_checkpoint_to_pytorch(tf_checkpoint_path, config_path, output_folder): # Instantiate model logger.info(f'Loading model based on config from {config_path}...') config = BertConfig.from_json_file(config_path) model = BertForSequenceClassification(config) # Load weights from checkpoint logger.info(f'Loading weights from checkpoint {tf_checkpoint_path}...') load_tf2_weights_in_bert(model, tf_checkpoint_path, config) # Create dirs if not os.path.isdir(output_folder): os.makedirs(output_folder) # Save pytorch-model f_out_model = os.path.join(output_folder, 'pytorch_model.bin') logger.info(f'Saving PyTorch model to {f_out_model}...') torch.save(model.state_dict(), f_out_model) # Save config to output f_out_config = os.path.join(output_folder, 'config.json') logger.info(f'Saving config to {f_out_config}...') config.to_json_file(f_out_config)