def load_transformer_model(model_dir): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') config = RobertaConfig.from_json_file('{}/config.json'.format(model_dir)) model = RobertaForSequenceClassification.from_pretrained(model_dir, config=config) model = model.to(device) return model
def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "ROBERTA constructor." ) # TK: The following code checks the same once again. if vocab_size is not None: config = RobertaConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = RobertaModel(config) elif pretrained_model_name is not None: model = RobertaModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = RobertaConfig.from_json_file(config_filename) model = RobertaModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor" ) model.to(self._device) self.add_module("roberta", model) self.config = model.config self._hidden_size = model.config.hidden_size
def model_fn(model_dir): model_path = '{}/{}'.format(model_dir, MODEL_NAME) model_config_path = '{}/{}'.format(model_dir, 'config.json') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') config = RobertaConfig.from_json_file(model_config_path) model = RobertaForSequenceClassification.from_pretrained(model_path, config=config) model.to(device) return model
def __init__(self, atokenizer: AtomTokenizer, tokenizer: Tokenizer, dec_dim: int = 64, device: str = 'cpu', load_pretrained: bool = False): super(Parser, self).__init__() self.enc_dim = 768 self.dec_dim = dec_dim self.num_embeddings = len(atokenizer) self.device = device self.atom_tokenizer = atokenizer self.type_parser = TypeParser() self.tokenizer = tokenizer self.dropout = Dropout(0.1) self.enc_heads = 8 self.dec_heads = 8 self.d_atn_dec = self.dec_dim // self.dec_heads if load_pretrained: self.word_encoder = RobertaModel.from_pretrained( "pdelobelle/robbert-v2-dutch-base").to(device) else: json_path = path.join( path.join(path.dirname(path.dirname(__file__)), 'data'), 'config.json') self.word_encoder = RobertaModel( RobertaConfig.from_json_file(json_path)).to(device) self.supertagger = make_decoder(num_layers=6, num_heads_enc=self.enc_heads, num_heads_dec=self.dec_heads, d_encoder=self.enc_dim, d_decoder=self.dec_dim, d_atn_enc=self.enc_dim // self.enc_heads, d_atn_dec=self.d_atn_dec, d_v_enc=self.enc_dim // self.enc_heads, d_v_dec=self.dec_dim // self.dec_heads, d_interm=self.dec_dim * 2, dropout_rate=0.1).to(device) self.atom_embedder = ComplexEmbedding(self.num_embeddings, self.dec_dim // 2).to(device) self.linker = make_encoder(num_layers=3, num_heads=self.enc_heads, d_intermediate=self.dec_dim * 4, dropout=0.15, d_model=self.dec_dim * 2, d_k=(self.dec_dim * 2) // self.dec_heads, d_v=(self.dec_dim * 2) // self.dec_heads).to(device) self.pos_transformation = Sequential( FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2), LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device) self.neg_transformation = Sequential( FFN(self.dec_dim * 2, self.dec_dim, 0.1, self.dec_dim // 2), LayerNorm(self.dec_dim // 2, eps=1e-12)).to(device)
def __init__(self, classifier_config_dir, device, task_type, n_clf_layers=6, use_dm=True, use_pm=True, use_rt=True, use_bio=False, use_name=False, use_network=False, use_count=False): super(ConcatenatedClassifier, self).__init__() # load text model self.device = device self.task_type = task_type self.use_text = use_dm | use_pm | use_rt self.use_bio = use_bio self.use_name = use_name self.use_etc = use_network | use_count self.text_model = RobertaModel.from_pretrained( "vinai/bertweet-base", output_attentions=False, output_hidden_states=False) if self.use_name: self.charEmbedding = nn.Embedding( num_embeddings=302, embedding_dim=300, padding_idx=301) # 302: 300-top frequent + pad + unk self.conv3 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=3, padding=1) self.conv4 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=4, padding=1) self.conv5 = nn.Conv1d(in_channels=300, out_channels=256, kernel_size=5, padding=1) # load classifier for combining these features config = RobertaConfig() config = config.from_json_file(classifier_config_dir) config.num_hidden_layers = n_clf_layers config.num_attention_heads = n_clf_layers config.max_position_embeddings = 7 if self.use_bio: config.max_position_embeddings += 2 if self.use_name: config.max_position_embeddings += 4 self.concat_model = RobertaModel(config) self.classifier = ClassifierLayer(use_count=use_count, use_network=use_network) return
def __init__(self, bert_config, my_config): super(NqModel, self).__init__() #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072, # attention_probs_dropout_prob=0) self.my_mask = None roberta_config = RobertaConfig.from_json_file("./configs/roberta-mnli.config") roberta_config.hidden_dropout_prob = 0.15 roberta_config.attention_probs_dropout_prob = 0.15 roberta_config.layer_norm_eps = 5e-6 print(roberta_config) self.bert = RobertaModel.from_pretrained("roberta-large-mnli",config=roberta_config) my_config = bert_config = self.bert.config #self.bert = RobertaModel.from_pretrained("roberta-base") self.right = 0 self.all = 0 #self.bert = AlbertModel(albert_base_configuration) #self.bert2 = BertModel(bert_config) #self.bert = BertModel(BertConfig()) #self.bert = RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265)) #print(my_config,bert_config) self.tok_dense = nn.Linear(my_config.hidden_size, my_config.hidden_size) # self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) # self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) self.dropout = nn.Dropout(my_config.hidden_dropout_prob) self.tok_outputs = nn.Linear(my_config.hidden_size, 1) # tune to avoid fell into bad places # self.para_outputs = nn.Linear(self.config.hidden_size, 1) # self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2) # self.tok_to_label = nn.Linear(my_config.max_token_len,2) # self.par_to_label = nn.Linear(my_config.max_paragraph_len,2) #self.encoder = Encoder(my_config) # self.encoder = Encoder(my_config) # self.encoder2 = Encoder(my_config) self.my_config = my_config # self.my_mask = self.ACC = 0 self.ALL = 0
def modify_transformer_config( model, batch_size=8, attention_probs_dropout_prob=0.4, learning_rate=5e-7, adam_epsilon=1e-8, hidden_dropout_prob=0.3, lm_model_dir=None, ): if model == "bert": config = BertConfig.from_json_file(f"{lm_model_dir}/config.json") elif model == "distilbert": config = DistilBertConfig.from_json_file(f"{lm_model_dir}/config.json") elif model == "roberta": config = RobertaConfig.from_json_file(f"{lm_model_dir}/config.json") config.attention_probs_dropout_prob = attention_probs_dropout_prob config.do_sample = True config.num_beams = 500 config.hidden_dropout_prob = hidden_dropout_prob config.repetition_penalty = 5 config.num_labels = 3 return config
torch.manual_seed(args.seed) params.seed = args.seed # Set the logger utils.set_logger() # Create the input data pipeline logging.info("Loading the dataset...") dataloader = NERDataLoader(params) val_loader, test_loader = dataloader.load_data(mode='test') logging.info("- done.") # Define the model logging.info('Loading the model...') config_path = os.path.join(params.params_path, 'bert_config.json') config = RobertaConfig.from_json_file(config_path) model = ElectraForTokenClassification(config, params=params) model.to(params.device) # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) logging.info('- done.') logging.info("Starting prediction...") if mode == 'test': predict(model, test_loader, params, mode=mode) elif mode == 'val': predict(model, val_loader, params, mode=mode) logging.info('- done.')
if __name__ == '__main__': batch_size = 32 device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') kwargs = { 'num_workers': 4, 'pin_memory': True } if torch.cuda.is_available() else {} # tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # config = BertConfig.from_json_file( # '../pre_weights/bert-base-uncased_config.json') # model = BertForMultipleChoice(config) os.chdir('/mnt/ssd/qianqian/semeval2020') tokenizer = RobertaTokenizer.from_pretrained('roberta-base') config = RobertaConfig.from_json_file( './pre_weights/roberta-base_config.json') model = RobertaForMultipleChoice(config) model.load_state_dict(torch.load('./checkpoints/checkpoint_2019.12.08-09.50.24_pair_91.805.pth')) model = model.to(device) questions = pd.read_csv( './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Training Data/subtaskB_data_all.csv') answers = pd.read_csv( './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Training Data/subtaskB_answers_all.csv', header=None, names=['id', 'ans']) data = pd.merge(questions, answers, how='left', on='id') questions = pd.read_csv( './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Trial Data/taskB_trial_data.csv') answers = pd.read_csv( './SemEval2020-Task4-Commonsense-Validation-and-Explanation-master/Trial Data/taskB_trial_answer.csv', header=None, names=['id', 'ans'])
def load_encode_module(): pass train_df_1 = pd.read_csv(train_file_1, header=None, sep='\t', index_col=0) train_df_1.columns = ['text', 'label'] label_map = dict( zip(train_df_1['label'].value_counts().index, range(len(train_df_1['label'].value_counts())))) train_df_1['label'] = train_df_1[['label']].applymap(lambda x: label_map[x]) train_df_1, test_df = train_test_split(train_df_1, test_size=0.2) model_name = "D:/model_file/hfl_chinese-roberta-wwm-ext" rob_config = RobertaConfig.from_json_file( "D:/model_file/hfl_chinese-roberta-wwm-ext/config.json") rob_config.num_labels = len(label_map) tokenizer = BertTokenizer.from_pretrained(model_name) train_sentences_1 = train_df_1.text.values train_labels_1 = train_df_1.label.values test_sentences_1 = test_df.text.values test_labels_1 = test_df.label.values train_input_ids, test_input_ids = [], [] train_attention_masks, test_attention_masks = [], [] for sent in train_sentences_1: encoded_dict = tokenizer.encode_plus( text=sent, # Sentence to encode.
""" 模型加载方法 """ import torch from transformers import RobertaConfig, RobertaModel, BertTokenizer config = RobertaConfig.from_json_file("data/checkpoint-100/config.json") # 切记这里使用RobertaConfig而非BertConfig,因为参数命名前缀不一致会导致参数加载失败 tokenizer = BertTokenizer.from_pretrained( "bert-base-chinese") # bert和roberta分词器一样,无所谓 roberta = RobertaModel.from_pretrained("data/checkpoint-100/pytorch_model.bin", config=config) # 加载了参数的roberta # 这里使用RobertaModel而非BertModel """ PS, raw_roberta = RobertaModel(config) # 这里会随机初始化参数 # raw_roberta.load_state_dict(new_state_dic) # 由于参数命名不一致,会报错 """