def __init__(self, model_name, cache_dir, task_list): super(MultiTaskModel, self).__init__() cache = os.path.join(cache_dir, model_name) self.transformer = XLNetModel.from_pretrained(model_name, cache_dir=cache) self.transformer_config = self.transformer.config self.dropout = DropoutWrapper(self.transformer_config.dropout) self.decoderID = {} #模型内部的task_id与decoder_id的映射 # self.decoder = {} self.decoder_list = nn.ModuleList() for innerid, task in enumerate(task_list): if task[1] == TaskType["classification"]: # task[1] = tasktype classifier = Classification(self.transformer_config) # classifier = Classification(self.transformer_config) print("use simple classification") self.decoder_list.append(classifier) elif task[1] == TaskType["SANclassification"]: classifier = SANClassifier(self.transformer_config.hidden_size, self.transformer_config.hidden_size, label_size=1, dropout=self.dropout) print("use SANClassifier") self.decoder_list.append(classifier) else: pass self.decoderID[task[0]] = innerid
def __init__(self, num_labels, pretrained_model_name_or_path=None, cat_num=0, token_size=None, MAX_SEQUENCE_LENGTH=512): super(BertModelForBinaryMultiLabelClassifier, self).__init__() if pretrained_model_name_or_path: # self.model = BertModel.from_pretrained( self.model = XLNetModel.from_pretrained( pretrained_model_name_or_path) else: raise NotImplementedError self.num_labels = num_labels if cat_num > 0: self.catembedding = nn.Embedding(cat_num, 768) self.catdropout = nn.Dropout(0.2) self.catactivate = nn.ReLU() self.catembeddingOut = nn.Embedding(cat_num, cat_num // 2 + 1) self.catactivateOut = nn.ReLU() self.dropout = nn.Dropout(0.2) self.classifier = nn.Linear(768 + cat_num // 2 + 1, num_labels) else: self.catembedding = None self.catdropout = None self.catactivate = None self.catembeddingOut = None self.catactivateOut = None self.dropout = nn.Dropout(0.2) self.classifier = nn.Linear(768, num_labels) # resize if token_size: self.model.resize_token_embeddings(token_size)
def __init__(self, bert_config, device, dropout_rate, n_class, lstm_hidden_size=None): """ :param bert_config: str, BERT configuration description :param device: torch.device :param dropout_rate: float :param n_class: int :param lstm_hidden_size: int """ super(CustomBertLSTMAttentionModel, self).__init__() self.bert_config = bert_config self.bert = XLNetModel.from_pretrained(self.bert_config, output_hidden_states = False) self.tokenizer = XLNetTokenizer.from_pretrained(self.bert_config, output_hidden_states= False) if not lstm_hidden_size: self.lstm_hidden_size = self.bert.config.hidden_size else: self.lstm_hidden_size = lstm_hidden_size self.n_class = n_class self.dropout_rate = dropout_rate self.lstm = nn.LSTM(self.bert.config.hidden_size, self.lstm_hidden_size, bidirectional=True) self.hidden_to_softmax = nn.Linear(self.lstm_hidden_size * 2, n_class, bias=True) self.dropout = nn.Dropout(p=self.dropout_rate) self.softmax = nn.Softmax(dim=1) self.device = device
def __init__(self, pretrained_model_dir, num_classes, segment_len=150, dropout_p=0.5): super(MyXLNetModel, self).__init__() self.seg_len = segment_len self.config = XLNetConfig.from_json_file(pretrained_model_dir + 'config.json') self.config.mem_len = 150 # enable the memory # self.xlnet = XLNetModel.from_pretrained(pretrained_model_dir, config=self.config) if feature_extract: for p in self.xlnet.parameters(): # 迁移学习:xlnet作为特征提取器 p.requires_grad = False d_model = self.config.hidden_size # 768 self.attention_layer1 = NyAttentioin(d_model, d_model // 2) self.attention_layer2 = NyAttentioin(d_model, d_model // 2) self.dropout = torch.nn.Dropout(p=dropout_p) self.fc = torch.nn.Linear(d_model, num_classes)
def __init__(self, bert_config, device, dropout_rate, n_class, out_channel=16): """ :param bert_config: str, BERT configuration description :param device: torch.device :param dropout_rate: float :param n_class: int :param out_channel: int, NOTE: out_channel per layer of BERT """ super(CustomBertConvModel, self).__init__() self.bert_config = bert_config self.dropout_rate = dropout_rate self.n_class = n_class self.out_channel = out_channel self.bert = XLNetModel.from_pretrained(self.bert_config, output_hidden_states=True) self.out_channels = self.bert.config.num_hidden_layers*self.out_channel self.tokenizer = XLNetTokenizer.from_pretrained(self.bert_config) self.conv = nn.Conv2d(in_channels=self.bert.config.num_hidden_layers, out_channels=self.out_channels, kernel_size=(3, self.bert.config.hidden_size), groups=self.bert.config.num_hidden_layers) self.hidden_to_softmax = nn.Linear(self.out_channels, self.n_class, bias=True) self.dropout = nn.Dropout(p=self.dropout_rate) self.device = device
def __init__(self, num_labels=2): super(XLNetForMultiLabelSequenceClassification, self).__init__() self.num_labels = num_labels self.xlnet = XLNetModel.from_pretrained('xlnet-base-cased') self.classifier = torch.nn.Linear(768, num_labels) torch.nn.init.xavier_normal_(self.classifier.weight)
def get_model(output_dir): ''' output_dir: path to hugging face directory ''' model = XLNetModel.from_pretrained(output_dir) tokeniser = XLNetTokenizer.from_pretrained(output_dir) return model, tokeniser
def __init__(self, vocab_size, hidden_size,output_size,num_labels=2, dropout_rate=0.3): super(XLNetClassification, self).__init__() self.xlnet = XLNetModel.from_pretrained('xlnet-base-cased') self.classifier = torch.nn.Linear(hidden_size, output_size) self.dropout = nn.Dropout(dropout_rate) self.embedding= nn.Embedding(vocab_size, hidden_size, padding_idx = 0) torch.nn.init.xavier_normal_(self.classifier.weight)
def __init__(self, config): super(Model, self).__init__() self.xlnet = XLNetModel.from_pretrained(config.xlnet_path, num_labels=config.num_classes) for param in list(self.xlnet.parameters())[:-5]: param.requires_grad = False self.fc = nn.Linear(config.hidden_size, 192) self.fc1 = nn.Linear(192, config.num_classes)
def __init__(self, config): super(XLNet, self).__init__() self.xlnet = XLNetModel.from_pretrained(config.model_path) self.isDropout = True if 0 < config.dropout < 1 else False self.dropout = nn.Dropout(p=config.dropout) self.fc = nn.Linear(self.xlnet.d_model, config.num_class)
def __init__(self, config): super().__init__() if config['model']['pretrained_model'] == 'XLNet': self.pretrainedModel = XLNetModel.from_pretrained( config['model']['xlnet_base_chinese']) self.tokenizer = XLNetTokenizer.from_pretrained( self.config['model']['xlnet_base_chinese'], do_lower_case=True) if config['model']['pretrained_model'] == 'Bert': self.pretrainedModel = BertModel.from_pretrained( config['model']['bert_base_chinese']) self.tokenizer = BertTokenizer.from_pretrained( config['model']['bert_base_chinese'], do_lower_case=True) #for p in self.bertModel.parameters(): p.requires_grad = False self.dropout = nn.Dropout(config['model']['dropout']) self.lstm = nn.LSTM( input_size=768, hidden_size=768 // 2, batch_first=True, bidirectional=True ) #, num_layers=2,dropout=config['model']['dropout']) #self.layerNorm = nn.LayerNorm(768) self.fc = nn.Linear(768, len(tagDict)) #weight = torch.Tensor([1, 1, 2.5, 2.5, 2.5]).to(config['DEVICE']) weight = torch.Tensor([1, 1, 3, 3, 3]).to(config['DEVICE']) self.criterion = nn.CrossEntropyLoss(weight=weight)
def __init__(self, args, num_class, use_cls=True): super().__init__() self.args = args # gcn layer self.use_cls = use_cls self.dropout = nn.Dropout(args.dropout) if args.basemodel == 'xlnet': self.xlnet = XLNetModel.from_pretrained(args.bert_model_dir) elif args.basemodel == 'xlnet_dialog': self.xlnet = XLNetModel_dialog.from_pretrained(args.bert_model_dir) self.xlnet.mem_len = args.mem_len self.xlnet.attn_type = args.attn_type in_dim = args.bert_dim pool_layers = [nn.Linear(in_dim, args.hidden_dim), nn.ReLU()] self.pool_fc = nn.Sequential(*pool_layers) # output mlp layers layers = [] for _ in range(args.mlp_layers): layers += [nn.Linear(args.hidden_dim, args.hidden_dim), nn.ReLU()] layers += [nn.Linear(args.hidden_dim, num_class)] self.out_mlp = nn.Sequential(*layers)
def __init__(self, model_name, num_labels=2): super(ClassificationXLNet, self).__init__() self.transformer = XLNetModel.from_pretrained(model_name) self.max_pool = nn.MaxPool1d(64) self.drop = nn.Dropout(0.3) self.linear = nn.Sequential(nn.Linear(768, num_labels))
def __init__(self, max_seq_len=512, min_window_overlap=128, mask='none', dropout_rate=0.1, fp16=False, yes_no_logits=False, ctx_emb='bert'): super(DiaBERT, self).__init__() assert min_window_overlap % 2 == 0 self.ctx_emb = ctx_emb if ctx_emb == 'bert': pretrained_bert = BertModel.from_pretrained('bert-base-uncased') self.bert = Bert(768, pretrained_bert, mask) elif ctx_emb == 'xlnet': self.bert = XLNetModel.from_pretrained('xlnet-base-cased') self.linear_start_end = torch.nn.Linear(768, 2, bias=False) self.max_seq_len = max_seq_len self.min_window_overlap = min_window_overlap self.fp16 = fp16 if yes_no_logits: self.yesno_mlp = torch.nn.Sequential(torch.nn.Linear(768, 256), torch.nn.ReLU(), torch.nn.Linear(256, 3)) else: self.yesno_mlp = None
def __init__(self): super(StsClassifier, self).__init__() self.xlnet = XLNetModel.from_pretrained('xlnet-large-cased') self.linear1 = nn.Linear(1024, 1024) self.linear2 = nn.Linear(1024, 512) self.linear3 = nn.Linear(512, 1) self.activation = nn.ReLU()
def select_pretrained(model_name, cache_dir): cache = os.path.join(cache_dir, model_name) if 'bert' in model_name: return BertModel.from_pretrained(model_name, cache_dir=cache) elif 'xlnet' in model_name: return XLNetModel.from_pretrained(model_name, cache_dir=cache) else: return None
def __init__(self): super(XLNetCls, self).__init__() self.xlnet = XLNetModel.from_pretrained(config.XLNET_MODEL_PATH) self.liner = torch.nn.Sequential( torch.nn.BatchNorm1d(config.EMBEDDING_DIM * 2), torch.nn.Dropout(), torch.nn.Linear(config.EMBEDDING_DIM * 2, 256), torch.nn.BatchNorm1d(256), torch.nn.Dropout(), torch.nn.ReLU(), torch.nn.Linear(256, 1), torch.nn.Sigmoid())
def __init__(self, xlnet_path, num_classes, word_embedding, trained=True): super(XLNet, self).__init__() self.xlnet = XLNetModel.from_pretrained(xlnet_path) # 不对bert进行训练 for param in self.xlnet.parameters(): param.requires_grad = trained self.fc = nn.Linear(self.xlnet.d_model, num_classes)
def get_bert(bert_model, bert_do_lower_case): # Avoid a hard dependency on BERT by only importing it if it's being used from transformers import XLNetTokenizer, XLNetModel model = XLNetModel.from_pretrained('huseinzol05/xlnet-base-bahasa-standard-cased') tokenizer = XLNetTokenizer.from_pretrained( 'huseinzol05/xlnet-base-bahasa-standard-cased', do_lower_case = False ) return tokenizer, model
def __init__(self, dropout, max_len=None): super(XLNETModel, self).__init__() self.xlnet = XLNetModel.from_pretrained( config.PATHS['xlnet'], config=XLNetConfig()) self.fc = nn.Linear(768 * max_len, 2) self.dropout = nn.Dropout(dropout)
def __init__(self, dropout_rate=0.3, n_outputs=2): super(XLNETClassifier, self).__init__() self.pretrained_model = XLNetModel.from_pretrained("xlnet-base-cased") self.sequence_summary = SequenceSummary(self.pretrained_model.config) self.d1 = torch.nn.Dropout(dropout_rate) self.l1 = torch.nn.Linear(768, 64) self.bn1 = torch.nn.LayerNorm(64) self.d2 = torch.nn.Dropout(dropout_rate) self.l2 = torch.nn.Linear(64, n_outputs)
def __init__(self, num_class, pretrain_model): super(xlnet_classifier, self).__init__() self.pretrain_model = pretrain_model self.num_class = num_class self.xlnet = XLNetModel.from_pretrained(pretrain_model) self.drop = nn.Dropout(p=0.3) self.out = nn.Linear(self.xlnet.config.hidden_size, num_class) self.tokenizer = XLNetTokenizer.from_pretrained(pretrain_model)
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: bool = False): super(XLNet, self).__init__() self.config_keys = ['max_seq_length', 'do_lower_case'] self.max_seq_length = max_seq_length self.do_lower_case = do_lower_case self.xlnet = XLNetModel.from_pretrained(model_name_or_path) self.tokenizer = XLNetTokenizer.from_pretrained(model_name_or_path, do_lower_case=do_lower_case) self.cls_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.cls_token])[0] self.sep_token_id = self.tokenizer.convert_tokens_to_ids([self.tokenizer.sep_token])[0]
def __init__(self, model_name, num_fine_labels, num_coarse_labels): super().__init__() self.transformer = XLNetModel.from_pretrained( model_name, num_labels=num_fine_labels) self.sequence_summary = SequenceSummary(self.transformer.config) self.classifier_coarse = nn.Linear(self.transformer.config.d_model, num_coarse_labels) self.classifier_fine = nn.Linear(self.transformer.config.d_model, num_fine_labels)
def get_model_and_tokenizer(model_name, device, random_weights=False): model_name = model_name if model_name.startswith('xlnet'): model = XLNetModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = XLNetTokenizer.from_pretrained(model_name) sep = u'▁' emb_dim = 1024 if "large" in model_name else 768 elif model_name.startswith('gpt2'): model = GPT2Model.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = GPT2Tokenizer.from_pretrained(model_name) sep = 'Ġ' sizes = { "gpt2": 768, "gpt2-medium": 1024, "gpt2-large": 1280, "gpt2-xl": 1600 } emb_dim = sizes[model_name] elif model_name.startswith('xlm'): model = XLMModel.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = XLMTokenizer.from_pretrained(model_name) sep = '</w>' elif model_name.startswith('bert'): model = BertModel.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = BertTokenizer.from_pretrained(model_name) sep = '##' emb_dim = 1024 if "large" in model_name else 768 elif model_name.startswith('distilbert'): model = DistilBertModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = DistilBertTokenizer.from_pretrained(model_name) sep = '##' emb_dim = 768 elif model_name.startswith('roberta'): model = RobertaModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = RobertaTokenizer.from_pretrained(model_name) sep = 'Ġ' emb_dim = 1024 if "large" in model_name else 768 else: print('Unrecognized model name:', model_name) sys.exit() if random_weights: print('Randomizing weights') model.init_weights() return model, tokenizer, sep, emb_dim
def __init__(self, args, large, temp_dir, finetune=False, symbols=None): super(XLNet, self).__init__() self.args = args self.symbols = symbols self.device = "cpu" if args.visible_gpus == '-1' else "cuda" self.model = XLNetModel.from_pretrained('xlnet-base-cased', cache_dir=temp_dir) self.model.mem_len = self.args.mem_len self.model.config.output_hidden_states = True self.finetune = finetune
def test_embedding_matches_model(self): # try original model model = XLNetModel.from_pretrained('xlnet-base-cased') outputs = model(self.input) last_hidden_states = outputs[0] # try our version embed_outs = self.embed_model(self.input) last_embedding = embed_outs[0] assert torch.all( torch.eq(last_embedding, last_hidden_states)), "embeddings were not the same"
def init_model(self): basic_encoder = None if self.config['use_bert']: bert_config = BertConfig.from_pretrained(self.config['bert_model_name'], cache_dir=self.config['bert_dir']) if self.config['num_bert_layer'] is not None: bert_config.num_hidden_layers = self.config['num_bert_layer'] bert = BertModel.from_pretrained(self.config['bert_model_name'], cache_dir=self.config['bert_dir'], config=bert_config) basic_encoder = bert elif self.config['use_xlnet']: xlnet_config = XLNetConfig.from_pretrained('hfl/chinese-xlnet-base', cache_dir=self.config['xlnet_dir']) xlnet_config.n_layer = self.config['num_xlnet_layer'] xlnet_config.mem_len = self.config['xlnet_mem_len'] xlnet = XLNetModel.from_pretrained('hfl/chinese-xlnet-base', cache_dir=self.config['xlnet_dir'], config=xlnet_config) basic_encoder = xlnet elif self.config['use_transformer']: bert_config = BertConfig.from_pretrained('bert-base-chinese', cache_dir=self.config['bert_dir']) if self.config['num_transformer_layer'] is not None: bert_config.num_hidden_layers = self.config['num_transformer_layer'] transf = BertModel(bert_config) basic_encoder = transf elif self.config['use_rnn_basic_encoder']: pass else: raise Exception('Not support other basic encoder') self.model = DocEE(self.config, basic_encoder, self.tokenizer) if self.config['cuda']: self.model.cuda() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config['learning_rate']) if self.config['resume_model']: OUTPUT_DIR = self.config['output_dir'] MODEL_SAVE_DIR = os.path.join(OUTPUT_DIR, self.config['model_save_dir']) if os.path.exists(MODEL_SAVE_DIR): cpt_file_names = os.listdir(MODEL_SAVE_DIR) if len(cpt_file_names) > 0: epoch_record = [] for cpt_file_name in cpt_file_names: epoch_record.append(int(cpt_file_name.split('-')[-1].split('.')[0])) epoch_record.sort() latest_epoch = epoch_record[-1] self.latest_epoch = latest_epoch + 1 latest_model_file_name = os.path.join(MODEL_SAVE_DIR, self.config['model_file'] % (self.config['ee_method'], latest_epoch)) if self.config['cuda']: store_dict = torch.load(latest_model_file_name, map_location=torch.device('cuda')) else: store_dict = torch.load(latest_model_file_name, map_location='cpu') self.model.load_state_dict(store_dict['model_state']) self.optimizer.load_state_dict(store_dict['optimizer_state']) print('resume train from %s' % latest_model_file_name) print('model init finish')
def get_model_and_tokenizer(model_name, device="cpu", random_weights=False, model_path=None): """ model_path: if given, initialize from path instead of official repo """ init_model = model_name if model_path: print("Initializing model from local path:", model_path) init_model = model_path if model_name.startswith("xlnet"): model = XLNetModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = XLNetTokenizer.from_pretrained(init_model) sep = u"▁" elif model_name.startswith("gpt2"): model = GPT2Model.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = GPT2Tokenizer.from_pretrained(init_model) sep = "Ġ" elif model_name.startswith("xlm"): model = XLMModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = XLMTokenizer.from_pretrained(init_model) sep = "</w>" elif model_name.startswith("bert"): model = BertModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = BertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("distilbert"): model = DistilBertModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = DistilBertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("roberta"): model = RobertaModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = RobertaTokenizer.from_pretrained(model_name) sep = "Ġ" else: print("Unrecognized model name:", model_name) sys.exit() if random_weights: print("Randomizing weights") model.init_weights() return model, tokenizer, sep
def __init__(self, config, x_embed): super().__init__() # pretrained_weights = "xlnet-base-cased" self.output_attentions = config.output_attentions self.model = XLNetModel.from_pretrained( config.pretrained_weights, output_attentions=self.output_attentions) self.pretrained_config = XLNetConfig.from_pretrained( config.pretrained_weights) self.encoder_out_size = self.model.config.d_model return