def __init__(self, args, dictionary, left_pad=False): super().__init__(dictionary) self.dropout = args.dropout from pytorch_transformers import RobertaModel, BertModel from pytorch_transformers.file_utils import PYTORCH_TRANSFORMERS_CACHE from pytorch_transformers import RobertaConfig, RobertaTokenizer, BertConfig, BertTokenizer if args.pretrained_bert_model.startswith('roberta'): self.embed = RobertaModel.from_pretrained(args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.context = RobertaModel.from_pretrained(args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.config = RobertaConfig.from_pretrained(args.pretrained_bert_model) self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base') else: self.embed = BertModel.from_pretrained(args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.context = BertModel.from_pretrained(args.pretrained_bert_model, cache_dir=PYTORCH_TRANSFORMERS_CACHE / 'distributed_{}'.format(args.distributed_rank)) self.config = BertConfig.from_pretrained(args.pretrained_bert_model) self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.padding_idx = self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
def __init__(self, config): super(RobertaForMultipleChoice, self).__init__(config) self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) self.apply(self.init_weights)
def __init__(self): super(PretrainedModel, self).__init__() self.model = RobertaModel.from_pretrained("roberta-large", output_hidden_states=True) self.config = self.model.config for p in self.parameters(): p.requires_grad = False
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = RobertaModel(config=config) model.eval() sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids) sequence_output, pooled_output = model(input_ids) result = { "sequence_output": sequence_output, "pooled_output": pooled_output, } self.parent.assertListEqual( list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def __init__(self, bert_path, bert_name='roberta-base', fine_tune=False, use_lstm=False, num_layers=2, bidirectional=False): super(QuestionEmbeddingModule, self).__init__() self.use_lstm = use_lstm self.bert_name = bert_name if self.use_lstm: # todo: 这里的知识图谱实体数量需根据所使用的具体知识图谱进行调整,下同 self.question_embed = torch.nn.Sequential( torch.nn.Embedding(num_embeddings=50265, embedding_dim=256), torch.nn.LSTM(input_size=256, hidden_size=768, num_layers=num_layers, bidirectional=bidirectional, batch_first=True)) else: logger.info( 'loading pretrained bert model from {}'.format(bert_path + bert_name)) if self.bert_name == 'roberta-base': self.question_embed = RobertaModel.from_pretrained(bert_path + bert_name) elif self.bert_name == 'bert-base-uncased': self.question_embed = BertModel.from_pretrained(bert_path + bert_name) else: raise Exception('bert model unspecified!') if not fine_tune: for param in self.question_embed.parameters(): param.requires_grad = False
def main(): best_result = float("-inf") logger.info("Loading data...") train_itr = DropBatchGen(args, data_mode="train", tokenizer=tokenizer) dev_itr = DropBatchGen(args, data_mode="dev", tokenizer=tokenizer) num_train_steps = int(args.max_epoch * len(train_itr) / args.gradient_accumulation_steps) logger.info("Num update steps {}!".format(num_train_steps)) logger.info("Build bert model.") bert_model = RobertaModel.from_pretrained(args.roberta_model) logger.info("Build Drop model.") network = NumericallyAugmentedBertNet( bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=args.dropout, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps) logger.info("Build optimizer etc...") model = DropBertModel(args, network, num_train_step=num_train_steps) train_start = datetime.now() first = True for epoch in range(1, args.max_epoch + 1): model.avg_reset() if not first: train_itr.reset() first = False logger.info('At epoch {}'.format(epoch)) for step, batch in enumerate(train_itr): model.update(batch) if model.step % ( args.log_per_updates * args.gradient_accumulation_steps) == 0 or model.step == 1: logger.info( "Updates[{0:6}] train loss[{1:.5f}] train em[{2:.5f}] f1[{3:.5f}] remaining[{4}]" .format( model.updates, model.train_loss.avg, model.em_avg.avg, model.f1_avg.avg, str((datetime.now() - train_start) / (step + 1) * (num_train_steps - step - 1)).split('.')[0])) model.avg_reset() total_num, eval_loss, eval_em, eval_f1 = model.evaluate(dev_itr) logger.info( "Eval {} examples, result in epoch {}, eval loss {}, eval em {} eval f1 {}." .format(total_num, epoch, eval_loss, eval_em, eval_f1)) if eval_f1 > best_result: save_prefix = os.path.join(args.save_dir, "checkpoint_best") model.save(save_prefix, epoch) best_result = eval_f1 logger.info("Best eval F1 {} at epoch {}".format( best_result, epoch)) logger.info("done training in {} seconds!".format( (datetime.now() - train_start).seconds))
def __init__(self, config): super(RobertaForRR, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) self.apply(self.init_weights)
def test_model_from_pretrained(self): cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list( ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = RobertaModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def __init__(self, config, tie_weights): super(RoBertaMCQWeightedSumScore, self).__init__(config) self.roberta = RobertaModel(config) self._dropout = nn.Dropout(config.hidden_dropout_prob) self._classification_layer = nn.Linear(config.hidden_size, 1) if tie_weights is True: self._weight_layer = self._classification_layer else: self._weight_layer = nn.Linear(config.hidden_size, 1) self.apply(self.init_weights)
def __init__(self, config): super(RobertaForRRWithNodeLoss, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) self.naf_layer = nn.Linear(config.hidden_size, config.hidden_size) self.classifier_node = NodeClassificationHead(config) self.apply(self.init_weights)
def __init__(self, config, num_choices=1, num_docs_rank=30): super(RobertaForMultipleChoice, self).__init__(config) self.num_choices = num_choices self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_choices) self.num_docs_rank = num_docs_rank self.apply(self.init_weights)
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: bool = True): super(RoBERTa, self).__init__() self.config_keys = ['max_seq_length', 'do_lower_case'] self.max_seq_length = max_seq_length self.do_lower_case = do_lower_case self.roberta = RobertaModel.from_pretrained(model_name_or_path) self.tokenizer = RobertaTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case) self.cls_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.cls_token])[0] self.sep_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.sep_token])[0]
def __init__(self, config): super(RoBertaMCQMAC, self).__init__(config) config.output_attentions = True self.roberta = RobertaModel(config) self._dropout = nn.Dropout(config.hidden_dropout_prob) self._classification_layer = nn.Linear(config.hidden_size, 1) self._key_components_detection_layer = nn.Linear( 3 * config.hidden_size, 1) self._attention_layer = 13 self._attention_head = 4 self.apply(self.init_weights)
def test_data(args): # result_dir = "../results/"+args['in_dir'].split("/")[-2]+"-"+args['model_dir'].split("/")[-2] result_dir = args["save_dir"]+args["save_folder"] if args['embed_size'] == 768: model = RobertaModel.from_pretrained('roberta-base').cuda() else: model = RobertaModel.from_pretrained('roberta-large').cuda() if args['parallel']: model = nn.DataParallel(model) classifier = FeedForward(args['embed_size'],int(args['embed_size']/2),args['nooflabels']).cuda() checkpoint = torch.load(args['model_dir']+args['model_name']) model.load_state_dict(checkpoint['model_state_dict']) classifier.load_state_dict(checkpoint['classifier_state_dict']) for split in args["eval_splits"]: try: data_file = open(args['in_dir']+split+".pkl",'rb') data = pickle.load(data_file) # print(len(data['encodings'])) acc,gold,pred = test(model,classifier,data) print("{} accuracy: {}".format(split, acc)) results = {"accuracy": acc, "gold": gold, "pred": pred} if args['save_enable']!=0: if not os.path.isdir(result_dir): os.mkdir(result_dir) with open(result_dir+"/predict_"+split+".json", 'w') as fp: json.dump(results, fp) except FileNotFoundError: print("{}.pkl file doesn't exist".format(split))
def test_inference_no_head(self): model = RobertaModel.from_pretrained('roberta-base') input_ids = torch.tensor( [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) output = model(input_ids)[0] # compare the actual values for a slice. expected_slice = torch.Tensor([[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]) self.assertTrue( torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
def __init__(self, opt): self.opt = opt if 'aen_simple' == opt.model_name: if 'bert' == opt.bert_type: tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name) bert = BertModel.from_pretrained(opt.pretrained_bert_name) self.model = opt.model_class(bert, opt).to(opt.device) elif 'roberta' == opt.bert_type: tokenizer = Tokenizer4RoBerta(opt.max_seq_len, opt.pretrained_bert_name) roberta = RobertaModel.from_pretrained( opt.pretrained_bert_name) self.model = opt.model_class(roberta, opt).to(opt.device) elif 'roberta' in opt.model_name: tokenizer = Tokenizer4RoBerta(opt.max_seq_len, opt.pretrained_bert_name) roberta = RobertaModel.from_pretrained(opt.pretrained_bert_name) self.model = opt.model_class(roberta, opt).to(opt.device) elif 'bert' in opt.model_name: tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name) bert = BertModel.from_pretrained(opt.pretrained_bert_name) self.model = opt.model_class(bert, opt).to(opt.device) self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer) self.testset = ABSADataset(opt.dataset_file['test'], tokenizer) assert 0 <= opt.valset_ratio < 1 if opt.valset_ratio > 0: valset_len = int(len(self.trainset) * opt.valset_ratio) self.trainset, self.valset = random_split( self.trainset, (len(self.trainset) - valset_len, valset_len)) else: self.valset = self.testset if opt.device.type == 'cuda': logger.info('cuda memory allocated: {}'.format( torch.cuda.memory_allocated(device=opt.device.index))) self._print_args()
def __init__( self, base_model_path, base_model_name, is_custom_pretrained, base_model_feature_size, additional_feature_size, num_classes, rnn_dimension, linear_1_dimension, ): super(MatchArchitecture, self).__init__() if not is_custom_pretrained: self.base_model = RobertaModel.from_pretrained(base_model_name) else: self.base_model = RobertaModel.from_pretrained(base_model_path) for param in self.base_model: param.required_grad = False self.match_head = MatchHead(base_model_feature_size, additional_feature_size, num_classes, rnn_dimension, linear_1_dimension)
def __init__(self, model): super().__init__() if 'roberta' in model: print("Roberta model: {}".format(model)) self.tokenizer = RobertaTokenizer.from_pretrained(model) self.bert = RobertaModel.from_pretrained(model) else: print("Bert model: {}".format(model)) self.tokenizer = BertTokenizer.from_pretrained(model) self.bert = BertModel.from_pretrained(model) self.dim = self.bert.pooler.dense.in_features self.max_len = self.bert.embeddings.position_embeddings.num_embeddings if use_cuda: self.cuda()
def __init__(self, opt): self.opt = opt if 'roberta' in opt.pretrained_bert_name: tokenizer = RobertaTokenizer.from_pretrained( opt.pretrained_bert_name) transformer = RobertaModel.from_pretrained( opt.pretrained_bert_name, output_attentions=True) elif 'bert' in opt.pretrained_bert_name: tokenizer = BertTokenizer.from_pretrained(opt.pretrained_bert_name) transformer = BertModel.from_pretrained(opt.pretrained_bert_name, output_attentions=True) elif 'xlnet' in opt.pretrained_bert_name: tokenizer = XLNetTokenizer.from_pretrained( opt.pretrained_bert_name) transformer = XLNetModel.from_pretrained(opt.pretrained_bert_name, output_attentions=True) if 'bert' or 'xlnet' in opt.model_name: tokenizer = Tokenizer4Pretrain(tokenizer, opt.max_seq_len) self.model = opt.model_class(transformer, opt).to(opt.device) # elif 'xlnet' in opt.model_name: # tokenizer = Tokenizer4Pretrain(tokenizer, opt.max_seq_len) # self.model = opt.model_class(bert,opt).to(opt.device) else: tokenizer = build_tokenizer( fnames=[opt.dataset_file['train'], opt.dataset_file['test']], max_seq_len=opt.max_seq_len, dat_fname='{0}_tokenizer.dat'.format(opt.dataset)) embedding_matrix = build_embedding_matrix( word2idx=tokenizer.word2idx, embed_dim=opt.embed_dim, dat_fname='{0}_{1}_embedding_matrix.dat'.format( str(opt.embed_dim), opt.dataset)) self.model = opt.model_class(embedding_matrix, opt).to(opt.device) self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer) self.testset = ABSADataset(opt.dataset_file['test'], tokenizer) assert 0 <= opt.valset_ratio < 1 if opt.valset_ratio > 0: valset_len = int(len(self.trainset) * opt.valset_ratio) self.trainset, self.valset = random_split( self.trainset, (len(self.trainset) - valset_len, valset_len)) else: self.valset = self.testset if opt.device.type == 'cuda': logger.info('cuda memory allocated: {}'.format( torch.cuda.memory_allocated(device=opt.device.index))) self._print_args()
def __init__(self, token_makers, lang_code="en", pretrained_model_name=None, answer_maxlen=30): super(RoBertaForQA, self).__init__(token_makers) self.lang_code = lang_code self.use_pytorch_transformers = True # for optimizer's model parameters self.answer_maxlen = answer_maxlen self.model = RobertaModel.from_pretrained(pretrained_model_name, cache_dir=str( CachePath.ROOT)) self.qa_outputs = nn.Linear(self.model.config.hidden_size, self.model.config.num_labels) self.criterion = nn.CrossEntropyLoss()
def __init__(self, token_makers, pretrained_model_name=None, dropout=0.2): super(RobertaForRegression, self).__init__(token_makers) self.use_pytorch_transformers = True # for optimizer's model parameters NUM_CLASSES = 1 self._model = RobertaModel.from_pretrained(pretrained_model_name, cache_dir=str( CachePath.ROOT)) self.classifier = nn.Sequential( nn.Dropout(dropout), nn.Linear(self._model.config.hidden_size, NUM_CLASSES)) self.classifier.apply(self._model.init_weights) self.criterion = nn.MSELoss()
def __init__(self, token_makers, num_classes, pretrained_model_name=None, dropout=0.2): super(RobertaForSeqCls, self).__init__(token_makers) self.use_pytorch_transformers = True # for optimizer's model parameters self.num_classes = num_classes self._model = RobertaModel.from_pretrained( pretrained_model_name, cache_dir=str(CachePath.ROOT) ) self.classifier = nn.Sequential( nn.Linear(self._model.config.hidden_size, self._model.config.hidden_size), nn.Dropout(dropout), nn.Linear(self._model.config.hidden_size, num_classes) ) self.classifier.apply(self._model.init_weights) self.criterion = nn.CrossEntropyLoss()
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: bool = True): super(RoBERTa, self).__init__() self.config_keys = ['max_seq_length', 'do_lower_case'] self.do_lower_case = do_lower_case if max_seq_length > 510: logging.warning( "RoBERTa only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510" ) max_seq_length = 510 self.max_seq_length = max_seq_length self.roberta = RobertaModel.from_pretrained(model_name_or_path) self.tokenizer = RobertaTokenizer.from_pretrained( model_name_or_path, do_lower_case=do_lower_case) self.cls_token_id = self.tokenizer.convert_tokens_to_ids( [self.tokenizer.cls_token])[0] self.sep_token_id = self.tokenizer.convert_tokens_to_ids( [self.tokenizer.sep_token])[0]
def test_roberta_embeddings(): roberta_model: str = "roberta-base" tokenizer = RobertaTokenizer.from_pretrained(roberta_model) model = RobertaModel.from_pretrained( pretrained_model_name_or_path=roberta_model, output_hidden_states=True ) model.to(flair.device) model.eval() s: str = "Berlin and Munich have a lot of puppeteer to see ." with torch.no_grad(): tokens = tokenizer.tokenize("<s> " + s + " </s>") indexed_tokens = tokenizer.convert_tokens_to_ids(tokens) tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = tokens_tensor.to(flair.device) hidden_states = model(tokens_tensor)[-1] first_layer = hidden_states[1][0] assert len(first_layer) == len(tokens) # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # # '<s>', 'Ber', 'lin', 'Ġand', 'ĠMunich', 'Ġhave', 'Ġa', 'Ġlot', 'Ġof', 'Ġpupp', 'ete', 'er', 'Ġto', 'Ġsee', 'Ġ.', '</s>' # \ / | | | | | | \ | / | | | # Berlin and Munich have a lot of puppeteer to see . # # 0 1 2 3 4 5 6 7 8 9 10 def embed_sentence( sentence: str, pooling_operation, layers: str = "1", use_scalar_mix: bool = False, ) -> Sentence: embeddings = RoBERTaEmbeddings( pretrained_model_name_or_path=roberta_model, layers=layers, pooling_operation=pooling_operation, use_scalar_mix=use_scalar_mix, ) flair_sentence = Sentence(sentence) embeddings.embed(flair_sentence) return flair_sentence # First subword embedding sentence_first_subword = embed_sentence(sentence=s, pooling_operation="first") first_token_embedding_ref = first_layer[1].tolist() first_token_embedding_actual = sentence_first_subword.tokens[0].embedding.tolist() puppeteer_first_subword_embedding_ref = first_layer[9].tolist() puppeteer_first_subword_embedding_actual = sentence_first_subword.tokens[ 7 ].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert ( puppeteer_first_subword_embedding_ref == puppeteer_first_subword_embedding_actual ) # Last subword embedding sentence_last_subword = embed_sentence(sentence=s, pooling_operation="last") # First token is splitted into two subwords. # As we use "last" as pooling operation, we consider the last subword as "first token" here first_token_embedding_ref = first_layer[2].tolist() first_token_embedding_actual = sentence_last_subword.tokens[0].embedding.tolist() puppeteer_last_subword_embedding_ref = first_layer[11].tolist() puppeteer_last_subword_embedding_actual = sentence_last_subword.tokens[ 7 ].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert ( puppeteer_last_subword_embedding_ref == puppeteer_last_subword_embedding_actual ) # First and last subword embedding sentence_first_last_subword = embed_sentence( sentence=s, pooling_operation="first_last" ) first_token_embedding_ref = torch.cat([first_layer[1], first_layer[2]]).tolist() first_token_embedding_actual = sentence_first_last_subword.tokens[ 0 ].embedding.tolist() puppeteer_first_last_subword_embedding_ref = torch.cat( [first_layer[9], first_layer[11]] ).tolist() puppeteer_first_last_subword_embedding_actual = sentence_first_last_subword.tokens[ 7 ].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert ( puppeteer_first_last_subword_embedding_ref == puppeteer_first_last_subword_embedding_actual ) # Mean of all subword embeddings sentence_mean_subword = embed_sentence(sentence=s, pooling_operation="mean") first_token_embedding_ref = calculate_mean_embedding( [first_layer[1], first_layer[2]] ).tolist() first_token_embedding_actual = sentence_mean_subword.tokens[0].embedding.tolist() puppeteer_mean_subword_embedding_ref = calculate_mean_embedding( [first_layer[9], first_layer[10], first_layer[11]] ).tolist() puppeteer_mean_subword_embedding_actual = sentence_mean_subword.tokens[ 7 ].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert ( puppeteer_mean_subword_embedding_ref == puppeteer_mean_subword_embedding_actual ) # Check embedding dimension when using multiple layers sentence_mult_layers = embed_sentence( sentence="Munich", pooling_operation="first", layers="1,2,3,4" ) ref_embedding_size = 4 * 768 actual_embedding_size = len(sentence_mult_layers.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size # Check embedding dimension when using multiple layers and scalar mix sentence_mult_layers_scalar_mix = embed_sentence( sentence="Berlin", pooling_operation="first", layers="1,2,3,4", use_scalar_mix=True, ) ref_embedding_size = 1 * 768 actual_embedding_size = len(sentence_mult_layers_scalar_mix.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size
from mspan_roberta_gcn.drop_roberta_dataset import DropReader from pytorch_transformers import RobertaTokenizer, RobertaModel, RobertaConfig parser = argparse.ArgumentParser("Bert inference task.") options.add_bert_args(parser) options.add_model_args(parser) options.add_inference_args(parser) args = parser.parse_args() args.cuda = torch.cuda.device_count() > 0 print("Build bert model.") bert_model = RobertaModel(RobertaConfig().from_pretrained(args.roberta_model)) print("Build Drop model.") network = NumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps) if args.cuda: network.cuda() print("Load from pre path {}.".format(args.pre_path)) network.load_state_dict(torch.load(args.pre_path)) print("Load data from {}.".format(args.inf_path)) tokenizer = RobertaTokenizer.from_pretrained(args.roberta_model) inf_iter = DropBatchGen(args, tokenizer, DropReader(tokenizer, passage_length_limit=463, question_length_limit=46)._read(args.inf_path))
parser = argparse.ArgumentParser("Bert inference task.") options.add_bert_args(parser) options.add_model_args(parser) options.add_inference_args(parser) parser.add_argument("--eng", type=int, required=False) args = parser.parse_args() args.cuda = torch.cuda.device_count() > 0 print("Build bert model.") if args.eng == 0: bert_model = BertModel.from_pretrained(args.roberta_model) else: bert_model = RobertaModel.from_pretrained(args.roberta_model) print("Build Drop model.") if args.tag_mspan: network = TNumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps, is_eng=args.eng) else: network = NumericallyAugmentedBertNet(bert_model, hidden_size=bert_model.config.hidden_size, dropout_prob=0.0, use_gcn=args.use_gcn, gcn_steps=args.gcn_steps)
crf = ConditionalRandomField(len(roles_to_idx), None, include_start_end_transitions=True) print(crf) model_parameters = filter(lambda p: p.requires_grad, chain(srl.parameters(), crf.parameters())) num_params = sum([np.prod(p.size()) for p in model_parameters]) print("Total parameters =", num_params) print(params) if params.use_bert: bert_tokenizer = RobertaTokenizer.from_pretrained("roberta-base") bert_model = RobertaModel.from_pretrained("roberta-base", output_hidden_states=True) if params.gpu_id > -1: bert_model.cuda() else: bert_tokenizer = None bert_model = None if params.gpu_id > -1: srl.cuda() crf.cuda() srl.load_state_dict(torch.load(os.path.join(params.dir, params.modelname))) crf.load_state_dict( torch.load(os.path.join(params.dir, params.modelname + "crf"))) evaluate(
def __init__(self): super(RobertaEncoder, self).__init__() self.encoder = RobertaModel.from_pretrained('roberta-base')
def __init__(self, config): super(RoBertaMCQConcat, self).__init__(config) self.roberta = RobertaModel(config) self._dropout = nn.Dropout(config.hidden_dropout_prob) self._classification_layer = nn.Linear(config.hidden_size, 1) self.apply(self.init_weights)
def create_model(self, only_model=False): logger.info("creating model {}".format(self.opt.model_name)) if self.opt.model_name in [ "aen_bert", "aen_distilbert", "aen_roberta", "aen_distilroberta", "spc_distilbert", "spc_bert", "spc_roberta", "lcf_bert", "fx_bert", ]: if not only_model: if self.opt.model_name in [ "aen_bert", "spc_bert", "lcf_bert", "fx_bert", ]: self.tokenizer = Tokenizer4Bert( self.opt.pretrained_model_name, self.opt.max_seq_len, self.opt.global_context_seqs_per_doc, ) elif self.opt.model_name in [ "aen_distilbert", "spc_distilbert" ]: self.tokenizer = Tokenizer4Distilbert( self.opt.pretrained_model_name, self.opt.max_seq_len, ) elif self.opt.model_name in ["aen_roberta", "spc_roberta"]: self.tokenizer = Tokenizer4Roberta( self.opt.pretrained_model_name, self.opt.max_seq_len, ) elif self.opt.model_name in [ "aen_distilroberta", "spc_distiloberta" ]: self.tokenizer = Tokenizer4Roberta( self.opt.pretrained_model_name, self.opt.max_seq_len, ) if not os.path.isdir(self.opt.pretrained_model_name): pretrained_model = torch.hub.load( 'huggingface/transformers', 'model', self.opt.pretrained_model_name) elif self.opt.model_name in [ "aen_bert", "spc_bert", "lcf_bert", "fx_bert" ]: pretrained_model = BertModel.from_pretrained( self.opt.pretrained_model_name, output_hidden_states=True) elif self.opt.model_name in ["aen_distilbert", "spc_distilbert"]: pretrained_model = DistilBertModel.from_pretrained( self.opt.pretrained_model_name, output_hidden_states=True) elif self.opt.model_name in ["aen_roberta", "spc_roberta"]: pretrained_model = RobertaModel.from_pretrained( self.opt.pretrained_model_name, output_hidden_states=True) if self.opt.state_dict == "pretrained": try: self.model = self.opt.model_class( pretrained_model, self.opt, pretrained=self.opt.state_dict == "pretrained", map_location=self.opt.device).to(self.opt.device) except TypeError as e: logger.error( "The selected model does not support the 'pretrained'-keyword for state_dict" ) exit(1) else: self.model = self.opt.model_class(pretrained_model, self.opt).to(self.opt.device) if self.opt.state_dict and self.opt.state_dict != "pretrained": # load weights from the state_dict logger.info(f"loading weights from {self.opt.state_dict}") self.model.load_state_dict( torch.load(self.opt.state_dict, map_location=self.opt.device)) elif self.opt.model_name in ["aen_glove", "ram"]: if not only_model: self.tokenizer = Tokenizer4GloVe(self.opt.max_seq_len) if self.opt.model_name == "aen_glove": self.model = self.opt.model_class( self.tokenizer.embedding_matrix, self.opt).to(self.opt.device) elif self.opt.model_name == "ram": self.model = self.opt.model_class(self.opt).to(self.opt.device) else: raise Exception("model_name unknown: {}".format( self.opt.model_name))