def __init__(self, L=30, model_state=None): super(MbPA, self).__init__() if model_state is None: # Key network to find key representation of content self.key_encoder = transformers.BertModel.from_pretrained( 'bert-base-uncased') # Bert model for text classification self.classifier = transformers.BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=33) else: cls_config = transformers.BertConfig.from_pretrained( 'bert-base-uncased', num_labels=33) self.classifier = transformers.BertForSequenceClassification( cls_config) self.classifier.load_state_dict(model_state['classifier']) key_config = transformers.BertConfig.from_pretrained( 'bert-base-uncased') self.key_encoder = transformers.BertModel(key_config) self.key_encoder.load_state_dict(model_state['key_encoder']) # load base model weights # we need to detach since parameters() method returns reference to the original parameters self.base_weights = self.classifier.parameters().clone().detach( ).to("cuda" if torch.cuda.is_available() else "cpu") # local adaptation learning rate - 1e-3 or 5e-3 self.loc_adapt_lr = 1e-3 # Number of local adaptation steps self.L = L
def __init__(self, model_state=None): super(EncDec, self).__init__() # Initialize the pretrained bert model for sequence classification if model_state is None: # from_pretrained() loads weights, config from the files # pytorch_model.bin and config.json if available in the directory provided self.classifier = transformers.BertForSequenceClassification.from_pretrained( '../pretrained_bert_tc/model_config') # If weigths and config not saved locally then the model will be downloaded # self.classifier = transformers.BertForSequenceClassification.from_pretrained( # 'bert-base-uncased', num_labels=33) else: # If config file not locally available, then # config = transformers.BertConfig.from_pretrained('bert-base-uncased', num_labels=33) config = transformers.BertConfig.from_pretrained( '../pretrained_bert_tc/model_config/config.json', num_labels=33) self.classifier = transformers.BertForSequenceClassification( config) self.classifier.load_state_dict(model_state['classifier'])
#### ############################################################################## ############################################################################## import sys eval_model = sys.argv[1] eval_dir = sys.argv[2] import transformers from time import time import torch roberta_single = transformers.BertForSequenceClassification(transformers.modeling_bert.BertConfig.from_json_file("roberta.large.zh.wwm.mnli/config.json")) states = torch.load(eval_model) roberta_single.load_state_dict(states['model']) log_steps = 500 num_epochs = 2 max_seq_length = 256 num_cores = torch.cuda.device_count() # 8 effective_batch_size = 64 # 8 bs per device update_freq = 1 # 4 bs per device fp16 = True class args:
def init_model(): config = transformers.BertConfig.from_pretrained('bert-base-uncased',num_labels=4,hidden_size = 768) model = transformers.BertForSequenceClassification(config) tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased') return config,model,tokenizer
train_data = TensorDataset(train_inputs, train_masks, train_labels) train_dataloader = DataLoader(train_data, batch_size=args.batch_size) validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels) validation_dataloader = DataLoader(validation_data, batch_size=args.batch_size) config = transformers.BertConfig( # vocab_size=len(tokenizer), vocab_size=10000, hidden_size=768, num_hidden_layers=6, max_position_embeddings=args.max_len, type_vocab_size=1, ) model = transformers.BertForSequenceClassification(config) if device == "cuda": model.cuda() # print(model) ### Training settings optimizer = transformers.AdamW(model.parameters(), lr=args.lr, eps=1e-8) total_steps = len(train_data) * args.epochs scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps) ### Training model.zero_grad() for epoch in range(1, args.epochs + 1):