def get_encoder(self): if self.hparams.model_type == 'bert': encoder = BertModel.from_pretrained('bert-base-uncased') elif self.hparams.model_type == 'bert-cased': encoder = BertModel.from_pretrained('bert-base-cased') elif self.hparams.model_type == 'bert-large': encoder = BertModel.from_pretrained('bert-large-uncased') elif self.hparams.model_type == 'distilbert': encoder = DistilBertModel.from_pretrained( 'distilbert-base-uncased') elif self.hparams.model_type == 'roberta': encoder = RobertaModel.from_pretrained('roberta-base') elif self.hparams.model_type == 'roberta-large': encoder = RobertaModel.from_pretrained('roberta-large') elif self.hparams.model_type == 'albert': encoder = AlbertModel.from_pretrained('albert-base-v2') elif self.hparams.model_type == 'albert-xxlarge': encoder = AlbertModel.from_pretrained('albert-xxlarge-v2') elif self.hparams.model_type == 'electra': encoder = ElectraModel.from_pretrained( 'google/electra-base-discriminator') elif self.hparams.model_type == 'electra-large': encoder = ElectraModel.from_pretrained( 'google/electra-large-discriminator') else: raise ValueError return encoder
def __init__(self, num_labels: int, top_comment_pretrained_model_name_or_path: Path, post_pretrained_model_name_or_path: Path, classifier_dropout_prob: float, meta_data_size: int, subreddit_pretrained_path: Path, num_subreddit_embeddings: int, subreddit_embeddings_size: int): super(AlbertForEigenmetricRegression, self).__init__() self.num_labels = num_labels self.construct_param_dict = \ OrderedDict({"num_labels": num_labels, "top_comment_pretrained_model_name_or_path": str(top_comment_pretrained_model_name_or_path), "post_pretrained_model_name_or_path": str(post_pretrained_model_name_or_path), "classifier_dropout_prob": classifier_dropout_prob, "meta_data_size": meta_data_size, "subreddit_pretrained_path": str(subreddit_pretrained_path), "num_subreddit_embeddings": num_subreddit_embeddings, "subreddit_embeddings_size": subreddit_embeddings_size}) # load pretrained two albert models: one for top comment and the other for post self.top_comment_albert = AlbertModel.from_pretrained( top_comment_pretrained_model_name_or_path) self.post_albert = AlbertModel.from_pretrained( post_pretrained_model_name_or_path) # drop out layer self.dropout = nn.Dropout(classifier_dropout_prob) # the final classifier layer self.hidden_dimension = \ self.top_comment_albert.config.hidden_size + self.post_albert.config.hidden_size \ + meta_data_size + subreddit_embeddings_size self.classifier = nn.Linear(self.hidden_dimension, num_labels) self._init_weights(self.classifier) # initialize the classifier # subreddit embeddings subreddit_embeddings = None if subreddit_pretrained_path: embeddings = torch.load(subreddit_pretrained_path) subreddit_embeddings = nn.Embedding.from_pretrained(embeddings, freeze=False) logger.info( f"Loaded subreddit embeddings from {subreddit_pretrained_path}" ) elif num_subreddit_embeddings is not None and subreddit_embeddings_size is not None: subreddit_embeddings = nn.Embedding(num_subreddit_embeddings, subreddit_embeddings_size) # these numbers are gained from the pretrained embeddings torch.nn.init.normal_(subreddit_embeddings.weight, mean=0, std=0.49) logger.info(f"Initialized subreddit embeddings") self.subreddit_embeddings = subreddit_embeddings self.is_feature_extractor = False
def __init__(self, n_outputs, size, pretrained_model_path=False): super(Albert, self).__init__() self.n_outputs = n_outputs self.size = size self.pretrained_model_path = pretrained_model_path if self.pretrained_model_path is False: self.huggingface_model = AlbertModel.from_pretrained( f"albert-{size}-v2") else: self.huggingface_model = AlbertModel.from_pretrained( pretrained_model_path) self.dropout = nn.Dropout(0.1) # hard coding self.out_proj = nn.Linear(self.huggingface_model.config.hidden_size, n_outputs)
def getBertModel(): # tokenizer = DistilBertTokenizer.from_pretrained(CONF['BERT_MODEL_PATH']) # model = DistilBertModel.from_pretrained(CONF['BERT_MODEL_PATH']) tokenizer = BertTokenizer.from_pretrained(CONF['BERT_MODEL_PATH']) model = AlbertModel.from_pretrained(CONF['BERT_MODEL_PATH']) return model, tokenizer
def __init__(self, config): super().__init__() self._if_infer = False self.classes = config.classes self.num_classes = config.num_classes self.device = config.device self.f1_average = config.f1_average self.num_labels = config.num_labels self.feature_cols = config.feature_cols self.encoder = AlbertModel.from_pretrained(config.transfer_path) [ setattr(param, "requires_grad", True) for param in self.encoder.parameters() ] self.fc = nn.Sequential( nn.BatchNorm1d(config.transfer_hidden), nn.Linear(config.transfer_hidden, config.linear_size), nn.ReLU(inplace=True), nn.BatchNorm1d(config.linear_size), nn.Dropout(config.dropout), nn.Linear(config.linear_size, self.num_classes)) self.criterion = nn.CrossEntropyLoss()
def __init__(self, my_config, args): super(NqModel, self).__init__() #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072, # attention_probs_dropout_prob=0) self.my_mask = None self.args = args #mfeb/albert-xxlarge-v2-squad2 self.bert_config = AlbertConfig.from_pretrained("albert-xxlarge-v2") # self.bert_config.gradient_checkpointing = True # self.bert_config.Extgradient_checkpointing = True self.bert = AlbertModel.from_pretrained("albert-xxlarge-v2", config=self.bert_config) # self.bert = AlbertModel.from_pretrained("albert-base-v2") my_config.hidden_size = self.bert.config.hidden_size self.right = 0 self.all = 0 #self.bert = AlbertModel(albert_base_configuration) #self.bert2 = BertModel(bert_config) #self.bert = BertModel(BertConfig()) #self.bert = RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265)) #print(my_config,bert_config) # self.tok_dense = nn.Linear(my_config.hidden_size, my_config.hidden_size) self.tok_dense = nn.Linear(my_config.hidden_size * 2, my_config.hidden_size * 2) # self.tok_dense2 = nn.Linear(my_config.hidden_size, my_config.hidden_size) # self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) # self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size) self.dropout = nn.Dropout(my_config.hidden_dropout_prob) self.tok_outputs = nn.Linear(my_config.hidden_size * 2, 1) # tune to avoid fell into bad places # self.tok_outputs2 = nn.Linear(my_config.hidden_size, 1) # config.max_token_len, config.max_token_relative # self.para_outputs = nn.Linear(self.config.hidden_size, 1) # self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2) # self.tok_to_label = nn.Linear(my_config.max_token_len,2) # self.par_to_label = nn.Linear(my_config.max_paragraph_len,2) #self.encoder = Encoder(my_config) self.encoder = Encoder(my_config) # self.encoder2 = Encoder(my_config) self.my_config = my_config self.model_choice = None self.ground_answer = None self.ACC = 0 self.ALL = 0 self.ErrId = []
def __init__(self): super(AlbertClassifier, self).__init__() D_in, H, D_out = 768, 50, 2 self.albert = AlbertModel.from_pretrained('albert-base-v1') self.classifier = nn.Sequential(nn.Linear(D_in, H), nn.ReLU(), nn.Dropout(0.4), nn.Linear(H, D_out))
def __init__(self, config): super(AlBert, self).__init__() model_config = AlbertConfig.from_pretrained( config.config_file, num_labels=config.num_labels, finetuning_task=config.task, ) self.albert = AlbertModel.from_pretrained( config.model_name_or_path, config=model_config, ) if config.requires_grad: for param in self.albert.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) #add the weighted layer self.hidden_weight = config.weighted_layer_tag #must modify the config.json self.pooling_tag = config.pooling_tag if self.hidden_weight: self.weight_layer = config.weighted_layer_num #self.weight = torch.zeros(self.weight_layer).to(config.device) self.weight = torch.nn.Parameter(torch.FloatTensor(self.weight_layer), requires_grad=True) self.softmax = nn.Softmax() self.pooler = nn.Sequential(nn.Linear(768, 768), nn.Tanh()) elif self.pooling_tag: self.maxPooling = nn.MaxPool1d(64) self.avgPooling = nn.AvgPool1d(64) self.pooler = nn.Sequential(nn.Linear(768*3, 768), nn.Tanh())
def _get_fallback_model(self) -> AlbertModel: """ Returns the CPU model """ if not self._model_fallback: self._model_fallback = AlbertModel.from_pretrained( self._model_directory ).eval() return self._model_fallback
def build_ban(dataset, num_hid, op='', gamma=4, task='vqa', use_counter=True): #w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, .0, op) #q_emb = QuestionEmbedding(300 if 'c' not in op else 600, num_hid, 1, False, .0) w_emb = AlbertTokenizer.from_pretrained('albert-large-v2') q_emb = AlbertModel.from_pretrained('albert-large-v2') params_set = set() for param in q_emb.parameters(): params_set.add(param) param.requires_grad = False v_att = BiAttention(dataset.v_dim, num_hid, num_hid, gamma) if task == 'vqa': b_net = [] q_prj = [] c_prj = [] objects = 10 # minimum number of boxes for i in range(gamma): b_net.append(BCNet(dataset.v_dim, num_hid, num_hid, None, k=1)) q_prj.append(FCNet([num_hid, num_hid], '', .2)) c_prj.append(FCNet([objects + 1, num_hid], 'ReLU', .0)) classifier = SimpleClassifier(num_hid, num_hid * 2, dataset.num_ans_candidates, .5) counter = Counter(objects) if use_counter else None return BanModel(dataset, params_set, w_emb, q_emb, v_att, b_net, q_prj, c_prj, classifier, counter, op, gamma) elif task == 'flickr': return BanModel_flickr(w_emb, q_emb, v_att, op, gamma)
def __init__( self, lang: str = 'en', ): try: from transformers import BertJapaneseTokenizer, AlbertTokenizer, CamembertTokenizer, AutoTokenizer from transformers import AlbertModel, CamembertModel, AutoModel except ImportError: msg = "importing bert dep failed." msg += "\n try to install sister by `pip install sister[bert]`." raise ImportError(msg) if lang == "en": tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2") model = AlbertModel.from_pretrained("albert-base-v2") elif lang == "fr": tokenizer = CamembertTokenizer.from_pretrained("camembert-base") model = CamembertModel.from_pretrained("camembert-base") elif lang == "es": tokenizer = AutoTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased") model = AutoModel.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased") elif lang == "ja": tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking") model = BertModel.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking") self.tokenizer = tokenizer self.model = model
def __init__(self): super().__init__() self.bert = AlbertModel.from_pretrained('albert-base-v2') self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2') self.score_fc = nn.Linear(768, 11) self.regression_fc = nn.Linear(768, 1) self.sigmoid = nn.Sigmoid()
def __init__( self, dropout: float, num_class: int, ptrain_ver: str, ): super().__init__() # Check if `ptrain_ver` is supported. if ptrain_ver not in TeacherAlbert.allow_ptrain_ver: raise ValueError( f'`ptrain_ver` {ptrain_ver} is not supported.\n' + 'Supported options:' + ''.join( list( map(lambda option: f'\n\t--ptrain_ver {option}', TeacherAlbert.allow_ptrain_ver.keys())))) # Load pre-train ALBERT model. self.encoder = AlbertModel.from_pretrained(ptrain_ver) # Dropout layer between encoder and linear layer. self.dropout = nn.Dropout(dropout) # Linear layer project from `d_model` into `num_class`. self.linear_layer = nn.Linear( in_features=TeacherAlbert.allow_ptrain_ver[ptrain_ver], out_features=num_class) # Linear layer initialization. with torch.no_grad(): nn.init.normal_(self.linear_layer.weight, mean=0.0, std=0.02) nn.init.zeros_(self.linear_layer.bias)
def __init__(self, max_len, hidden_layer=320, dropout=0.1, pretrained_model='bert', out_features=2): """ Initialise the model :param max_len: Maximum length of tokens in a sentence. If the sentence is too short, it should be zero padded :param hidden_layer: number of neurons to use in hidden dense layer :param dropout: dropout probability to use :param pretrained_model: type of the pre-trained model to use. Accepted strings are 'bert' and 'albert' :param out_features: number of out features e.g. 2 if you want only positive and negative """ super().__init__() if pretrained_model == 'bert': self.pretrained_model = BertModel.from_pretrained( 'bert-base-uncased') elif pretrained_model == 'albert': self.pretrained_model = AlbertModel.from_pretrained( 'albert-base-v2') self.dropout = nn.Dropout(dropout) self.fc1 = nn.Linear(max_len, hidden_layer) self.fc2 = nn.Linear(hidden_layer, out_features) nn.init.kaiming_normal_(self.fc1.weight) nn.init.kaiming_normal_(self.fc2.weight)
def test_model_from_pretrained(self): cache_dir = "/tmp/transformers_test/" for model_name in list(ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = AlbertModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def __init__(self, model_name_or_path: str, max_seq_length: int = 128, do_lower_case: Optional[bool] = None, model_args: Dict = {}, tokenizer_args: Dict = {}): super(ALBERT, self).__init__() self.config_keys = ['max_seq_length', 'do_lower_case'] self.do_lower_case = do_lower_case if max_seq_length > 510: logging.warning( "BERT only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510" ) max_seq_length = 510 self.max_seq_length = max_seq_length if self.do_lower_case is not None: tokenizer_args['do_lower_case'] = do_lower_case self.albert = AlbertModel.from_pretrained(model_name_or_path, **model_args) # self.tokenizer = AlbertTokenizer.from_pretrained(model_name_or_path, **tokenizer_args) self.tokenizer = BertTokenizer.from_pretrained(model_name_or_path, **tokenizer_args)
def __init__(self, albert_path, dropout, n_class): super(AlbertClassifier, self).__init__() self.albert_path = albert_path self.n_class = n_class self.albert = AlbertModel.from_pretrained(self.albert_path) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(self.albert.config.hidden_size, n_class)
def __init__(self, pretrain_path, max_length): nn.Module.__init__(self) self.bert = AlbertModel.from_pretrained(pretrain_path) for param in self.bert.parameters(): param.requires_grad = False self.bert.eval() self.max_length = max_length
def __init__(self, batch_size, lstm_hid_dim, d_a, n_classes, is_train, label_input_ids, label_attention_masks): super(StructuredSelfAttentionBert, self).__init__() self.n_classes = n_classes if config.bert_model == 'bert': self.bert = BertModel.from_pretrained('bert-base-uncased') elif config.bert_model == 'albert': self.bert = AlbertModel.from_pretrained('albert-base-v1') else: raise Exception('Error Bert model.') for name, param in self.bert.named_parameters(): param.requires_grad = is_train self.linear_label = torch.nn.Linear(config.bert_embedding_size, lstm_hid_dim) self.lstm = torch.nn.LSTM(input_size=config.bert_embedding_size, hidden_size=lstm_hid_dim, num_layers=1, batch_first=True, bidirectional=True) self.linear_first = torch.nn.Linear(lstm_hid_dim * 2, d_a) self.linear_second = torch.nn.Linear(d_a, n_classes) self.weight1 = torch.nn.Linear(lstm_hid_dim * 2, 1) self.weight2 = torch.nn.Linear(lstm_hid_dim * 2, 1) self.output_layer = torch.nn.Linear(config.bert_embedding_size, n_classes) self.embedding_dropout = torch.nn.Dropout(p=0.1) self.batch_size = batch_size self.lstm_hid_dim = lstm_hid_dim self.label_input_ids = label_input_ids self.label_attention_masks = label_attention_masks
def __init__( self, lang: str = "en", ): try: from transformers import (AlbertModel, AlbertTokenizer, BertConfig, BertJapaneseTokenizer, BertModel, CamembertModel, CamembertTokenizer) except ImportError: msg = "importing bert dep failed." msg += "\n try to install sister by `pip install sister[bert]`." raise ImportError(msg) if lang == "en": model_name = "albert-base-v2" tokenizer = AlbertTokenizer.from_pretrained(model_name) config = BertConfig.from_pretrained(model_name, output_hidden_states=True) model = AlbertModel.from_pretrained(model_name, config=config) elif lang == "fr": model_name = "camembert-base" tokenizer = CamembertTokenizer.from_pretrained(model_name) config = BertConfig.from_pretrained(model_name, output_hidden_states=True) model = CamembertModel.from_pretrained(model_name, config=config) elif lang == "ja": model_name = "cl-tohoku/bert-base-japanese-whole-word-masking" tokenizer = BertJapaneseTokenizer.from_pretrained(model_name) config = BertConfig.from_pretrained(model_name, output_hidden_states=True) model = BertModel.from_pretrained(model_name, config=config) self.tokenizer = tokenizer self.model = model
def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "ALBERT constructor." ) # TK: The following code checks the same once again. if vocab_size is not None: config = AlbertConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = AlbertModel(config) elif pretrained_model_name is not None: model = AlbertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = AlbertConfig.from_json_file(config_filename) model = AlbertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the ALBERT constructor" ) model.to(self._device) self.add_module("albert", model) self.config = model.config self._hidden_size = model.config.hidden_size
def __init__(self, num_classes, model_path='./albert_base'): super(AlbertCrf, self).__init__() self.albert = AlbertModel.from_pretrained(model_path) self.dropout = nn.Dropout(0.1) self.fc1 = nn.Linear(self.albert.config.hidden_size, 256) self.fc2 = nn.Linear(256, num_classes) self.crf = CRF(num_classes, batch_first=True)
def __init__(self, ): super().__init__() self.tokenizer = BertTokenizer.from_pretrained('./alberttiny') self.model = AlbertModel.from_pretrained('./alberttiny').to( Config.device) self.is_src = None #self.w2v= gensim.models.KeyedVectors.load_word2vec_format('../news_comment/baike_26g_news_13g_novel_229g.bin', binary=True) '''
def __init__(self, drop_prob=0): super(BertQA, self).__init__() self.bert = AlbertModel.from_pretrained("albert-large-v2") #for QA self.ans_se = nn.Linear(1024, 2) #for Beer, ten level sentiment self.sentiment = nn.Linear(1024, 1) self.sentiment_movie = nn.Linear(1024, 1) self.sigmoid = nn.Sigmoid()
def __init__(self,hidden_dim=768,num_tags=20): super(TagValueModel,self).__init__() self.albert = AlbertModel.from_pretrained("ALINEAR/albert-japanese-v2") self.dropout = nn.Dropout(0.1) self.tags = nn.Linear(768,num_tags) #self.tags_insentence = nn.Linear(768,20) self.starts = nn.Linear(768,num_tags) self.ends = nn.Linear(768,num_tags) self.num_tags = num_tags
def __init__(self, config): super(Model, self).__init__() self.config = AlbertConfig.from_pretrained(config.albert_config_path) self.albert = AlbertModel.from_pretrained(config.albert_model_path, config=self.config) for param in self.albert.parameters(): param.requires_grad = True self.fc = nn.Linear(config.hidden_size, config.num_classes)
def __init__(self, args, device, d_model=256, nhead=4, d_ff=1024, nlayers=2, dropout=0.5): super(Autoencoder, self).__init__() self.model_type = 'Transformer' self.d_model = d_model self.src_mask = None self.pos_encoder = PositionalEncoding(d_model, dropout) # encoder's position self.pos_decoder = PositionalEncoding(d_model, dropout) # decoder's position decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout) decoder_norm = nn.LayerNorm(d_model) self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers, decoder_norm) # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) if args.use_albert: self.bert_encoder = AlbertModel.from_pretrained( "clue/albert_chinese_tiny") self.bert_embed = self.bert_encoder.embeddings # self.tgt_embed = self.bert_embed d_vocab = self.bert_encoder.config.vocab_size + 1 self.tgt_embed = nn.Sequential( Embeddings(d_model, d_vocab), PositionalEncoding(d_model, dropout)) elif args.use_tiny_bert: self.bert_encoder = AutoModel.from_pretrained( "google/bert_uncased_L-2_H-256_A-4") self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed elif args.use_distil_bert: configuration = DistilBertConfig() self.bert_encoder = DistilBertModel(configuration) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed # self.tgt_embed = self.bert.embeddings else: self.bert_encoder = BertModel.from_pretrained( args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2) self.bert_embed = self.bert_encoder.embeddings self.tgt_embed = self.bert_embed self.distill_2 = args.distill_2 self.gru = nn.GRU(d_model, d_model, 1) self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1) self.sigmoid = nn.Sigmoid() self.device = device self.init_weights()
def load_pretrained_encoder(mpath, config="albert_config.json", model="albert_model.bin"): b_config = BC.from_pretrained(opt.join(mpath, config)) encoder = AlbertModel.from_pretrained(opt.join(mpath, model), config=b_config) return encoder
def __init__(self, config, num_label): super(AlbertQA, self).__init__() self.albert = AlbertModel.from_pretrained('albert-xxlarge-v1') self.fc = nn.Linear(config.hidden_size, num_label) self.drop = nn.Dropout(config.hidden_dropout_prob) self.loss = nn.CrossEntropyLoss(reduction='sum') torch.nn.init.xavier_uniform_(self.fc.weight) torch.nn.init.constant_(self.fc.bias, 0.)
def __init__(self, bert_name, num_class, bert_type='bert', drop_out=0.1): super(Bert, self).__init__() if bert_type == 'bert': self.bert = BertModel.from_pretrained(bert_name) elif bert_type == 'albert': self.bert = AlbertModel.from_pretrained(bert_name) else: raise Exception('Please enter the correct bert type.') self.drop_out = nn.Dropout(p=drop_out) self.classifier = nn.Linear(self.bert.config.hidden_size, num_class)