def __init__(self, final_size, pooling, bert_path, model_type='distilbert', pool_activation=None): super().__init__() if model_type == 'distilbert': self.bert = DistilBertModel.from_pretrained(bert_path, return_dict=True) else: self.bert = BertModel.from_pretrained(bert_path, return_dict=True) self.pooler = BertPooler(self.bert.config, final_size, pooling, pool_activation)
def __init__(self): """ In the constructor we instantiate two nn.Linear modules and assign them as member variables. """ super(ContrastiveModel, self).__init__() self.original_transformer = DistilBertModel.from_pretrained( 'distilbert-base-uncased') self.translation_transformer = DistilBertModel.from_pretrained( 'distilbert-base-german-cased') self.original_linear = torch.nn.Linear(201 * 768, 256) self.translation_linear = torch.nn.Linear(201 * 768, 256) self.original_norm = torch.nn.BatchNorm1d(256) self.translation_norm = torch.nn.BatchNorm1d(256) self.final_linear = torch.nn.Linear(512, 1)
def get_encoder(self): if self.hparams.model_type == 'bert': encoder = BertModel.from_pretrained('bert-base-uncased') elif self.hparams.model_type == 'bert-cased': encoder = BertModel.from_pretrained('bert-base-cased') elif self.hparams.model_type == 'bert-large': encoder = BertModel.from_pretrained('bert-large-uncased') elif self.hparams.model_type == 'distilbert': encoder = DistilBertModel.from_pretrained( 'distilbert-base-uncased') elif self.hparams.model_type == 'roberta': encoder = RobertaModel.from_pretrained('roberta-base') elif self.hparams.model_type == 'roberta-large': encoder = RobertaModel.from_pretrained('roberta-large') elif self.hparams.model_type == 'albert': encoder = AlbertModel.from_pretrained('albert-base-v2') elif self.hparams.model_type == 'albert-xxlarge': encoder = AlbertModel.from_pretrained('albert-xxlarge-v2') elif self.hparams.model_type == 'electra': encoder = ElectraModel.from_pretrained( 'google/electra-base-discriminator') elif self.hparams.model_type == 'electra-large': encoder = ElectraModel.from_pretrained( 'google/electra-large-discriminator') else: raise ValueError return encoder
def get_distilkobert_model(no_cuda=False): model = DistilBertModel.from_pretrained('monologg/distilkobert') device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu" model.to(device) return model
def __init__(self, vocab_size): super(DistilBertEncoder, self).__init__() self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased') self.bert.resize_token_embeddings(vocab_size) for param in self.bert.parameters(): param.requires_grad = False
def __init__(self): super(DistillBERTClass, self).__init__() self.l1 = DistilBertModel.from_pretrained( config.PRE_TRAINED_MODEL_NAME) self.pre_classifier = torch.nn.Linear(768, 768) # O/P of the bert self.dropout = torch.nn.Dropout(0.3) # Just a dropout self.classifier = torch.nn.Linear( 768, 25) # Since we combined sentiment to 31 targets
def __init__(self, config): super().__init__(config) self.bert = DistilBertModel.from_pretrained('monologg/distilkobert', output_hidden_states=True, output_attentions=True) self.seq_len = config.max_length self.linear = torch.nn.Linear(config.hidden_size, 7)
def __init__(self, extra_layers): super().__init__() self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased') self.config = self.distilbert.config self.distilbert = self.distilbert.transformer self.distilbert.layer = nn.ModuleList(self.distilbert.layer[6-extra_layers:]) self.qa_outputs = nn.Linear(768, 2) self.dropout = nn.Dropout(0.1)
def __init__(self, num_experts, unfreeze_gate): super().__init__() self.num_experts = num_experts self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased') if not unfreeze_gate: self.freeze(model=self.distilbert,unfreeze_layers=[]) self.pre_classifier = nn.Linear(768, num_experts*64) # dim, hidden self.classifier = nn.Linear(num_experts*64, num_experts) # hidden, output
def __init__(self, dropout=0.1): super(sentence_embeds_model, self).__init__() self.transformer = DistilBertModel.from_pretrained( 'distilbert-base-uncased', dropout=dropout, output_hidden_states=True) self.embedding_size = 2 * self.transformer.config.hidden_size
def __init__(self): super().__init__() self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased") self.pre_classifier = nn.Linear(768, 768) self.classifier = nn.Linear(768, 9) self.dropout = nn.Dropout(0.5)
def __init__(self, config): super().__init__(config) # self.distilbert = DistilBertModel(config) self.distilbert = DistilBertModel.from_pretrained( 'distilbert-base-uncased') self.pre_classifier = nn.Linear(config.dim, config.dim) self.classifier = nn.Linear(config.dim, config.num_labels) self.dropout = nn.Dropout(config.seq_classif_dropout)
def __init__(self, model_name="distilbert-base-uncased", device="cuda"): super().__init__() self.device = device self.tokenizer = DistilBertTokenizer.from_pretrained(model_name) self.model = DistilBertModel.from_pretrained(model_name).to( self.device) self.linear = nn.Linear(self.model.config.dim, self.model.config.num_labels).to(self.device) self.dropout = nn.Dropout(self.model.config.qa_dropout).to(self.device)
def __init__(self, config, num_labels=2): super(DistilBertForSequenceClassification, self).__init__() self.num_labels = num_labels self.config = config self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased', output_hidden_states=False) self.dropout = nn.Dropout(config.dropout) self.classifier = nn.Linear(config.hidden_size, num_labels) nn.init.xavier_normal_(self.classifier.weight)
def __init__(self, dropout): super(DISTILBertModel, self).__init__() self.distilbert = DistilBertModel.from_pretrained( config.PATHS['distilbert'], config=DistilBertConfig()) self.fc = nn.Linear(768, 2) self.dropout = nn.Dropout(dropout)
def __init__(self, hparams): """DS-DST belief tracker. Args: hparams: hyperparameters. Inputs: turn_input, turn_context, turn_span, slot_idx, train turn_inputs: Input of a turn including user utterance, system response, belief, gate, action. turn_context: Stacked concat of user utterance and system response. turn_span: Index of value spans in context. slot_idx: Index of slot on ontology. train: Whether train or not. Default: True Outputs: loss if train joint_acc, slot_acc else loss: Sum of gate loss, span loss and value loss. """ super(DST, self).__init__() self.context_encoder = DistilBertModel.from_pretrained( "distilbert-base-uncased") # use fine-tuning self.context_encoder.train() self.slot_encoder = DistilBertModel.from_pretrained( "distilbert-base-uncased").requires_grad_(False) self.value_encoder = DistilBertModel.from_pretrained( "distilbert-base-uncased").requires_grad_(False) # fix parameter self.tokenizer = DistilBertTokenizerFast.from_pretrained( "distilbert-base-uncased") self.hidden_size = self.context_encoder.embeddings.word_embeddings.embedding_dim # 768 self.linear_gate = nn.Linear(self.hidden_size * 2, 3) # none, don't care, prediction self.linear_span = nn.Linear(self.hidden_size, 2) # start, end self.value_ontology = json.load( open(os.path.join(hparams.data_path, "ontology_processed.json"), "r")) self.gate_loss_weight = torch.tensor([0.5, 1.0, 1.0]) self.gate_criterion = torch.nn.NLLLoss(weight=self.gate_loss_weight) # self.context_attention = SelfAttention(self.hidden_size, hparams.dropout) # self.context_attention = copy.deepcopy(self.context_encoder.transformer.layer[-1]) # self.context_attention.train() self.margin = hparams.margin self.use_span = hparams.use_span # default False
def generated_word_embedding(self, labels_csv_path): save_features_path = self.data_path if not os.path.exists(os.path.join(labels_csv_path)): print("It not existed {} file, please double check it.".format( labels_csv_path)) else: csv_file_name = labels_csv_path.split('/')[-1].split('.')[0] path_to_save_wb = csv_file_name + '_WE' if not os.path.exists( os.path.join(save_features_path, path_to_save_wb)): os.mkdir(os.path.join(save_features_path, path_to_save_wb)) path_to_save_features = os.path.join(save_features_path, path_to_save_wb) data = pd.read_csv(labels_csv_path) class_id = data.loc[:, ['labels_id']].values.squeeze() class_name = data.loc[:, ['labels']].values.squeeze() tokenizer = DistilBertTokenizer.from_pretrained( 'distilbert-base-uncased') model = DistilBertModel.from_pretrained('distilbert-base-uncased') model.eval() model.cuda() device = torch.device("cuda") assert len(class_id) == len(class_name) f = 0 for i in range(len(class_id)): label = class_id[i] feat = () if f >= 0: for sent in class_name: inputs = tokenizer.encode_plus( sent, add_special_tokens=True, return_tensors='pt', ) input_ids2 = torch.tensor( tokenizer.encode(sent)).unsqueeze( 0) # Batch size 1 input_ids2 = input_ids2.to(device) with torch.no_grad(): outputs2 = model(input_ids2) o2 = outputs2[0].to('cpu').numpy() feat += tuple(o2) scio.savemat( path_to_save_features + '/' + str(label) + '_' + str(class_name[i]) + '.mat', { 'feat_v': feat, 'GT': label }) f += 1 print('Finished generate word-embedding from "{}.csv", ' 'stored in the "{}"'.format(csv_file_name, path_to_save_features))
def getDistilBertEmbeddings(self): model = DistilBertModel.from_pretrained('distilbert-base-uncased') tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model.eval() tokens_tensor, segments_tensors = self.getIndexs(tokenizer) with torch.no_grad(): last_hidden_states = model(tokens_tensor, attention_mask=segments_tensors) features = last_hidden_states[0][:,0,:].numpy() features = np.reshape(features,features.shape[1]) return(features.tolist())
def __init__(self): super(DISTILBERT_SQUAD, self).__init__() self.distilbert_model = DistilBertModel.from_pretrained( 'distilbert-base-uncased') self.fc_layers = nn.Sequential(nn.Linear(768, 256), nn.ReLU(), nn.Linear(256, 2)) self.criterion = nn.CrossEntropyLoss()
def from_pretrained(cls, cfg, **kwargs): """ Load pretrained model from https://huggingface.co/models :param cfg: general hydra config :return: DistilBertModel object """ model = DistilBertModel.from_pretrained( cfg.model.model_path, num_labels=cfg.data_params.n_classes ) return cls(model, cfg)
def get_model_and_tokenizer(model_name, device, random_weights=False): model_name = model_name if model_name.startswith('xlnet'): model = XLNetModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = XLNetTokenizer.from_pretrained(model_name) sep = u'▁' emb_dim = 1024 if "large" in model_name else 768 elif model_name.startswith('gpt2'): model = GPT2Model.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = GPT2Tokenizer.from_pretrained(model_name) sep = 'Ġ' sizes = { "gpt2": 768, "gpt2-medium": 1024, "gpt2-large": 1280, "gpt2-xl": 1600 } emb_dim = sizes[model_name] elif model_name.startswith('xlm'): model = XLMModel.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = XLMTokenizer.from_pretrained(model_name) sep = '</w>' elif model_name.startswith('bert'): model = BertModel.from_pretrained(model_name, output_hidden_states=True).to(device) tokenizer = BertTokenizer.from_pretrained(model_name) sep = '##' emb_dim = 1024 if "large" in model_name else 768 elif model_name.startswith('distilbert'): model = DistilBertModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = DistilBertTokenizer.from_pretrained(model_name) sep = '##' emb_dim = 768 elif model_name.startswith('roberta'): model = RobertaModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = RobertaTokenizer.from_pretrained(model_name) sep = 'Ġ' emb_dim = 1024 if "large" in model_name else 768 else: print('Unrecognized model name:', model_name) sys.exit() if random_weights: print('Randomizing weights') model.init_weights() return model, tokenizer, sep, emb_dim
def calculate_BERT_representation(input_text): tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') model = DistilBertModel.from_pretrained("distilbert-base-uncased") input_ids = torch.tensor(tokenizer.encode(input_text)).unsqueeze(0) outputs = model(input_ids) last_hidden_states = outputs[0] mean_representation = torch.mean(last_hidden_states.squeeze(0)[1:-1], dim=0) return mean_representation
def __init__(self): gpu = torch.cuda.is_available() self.device = torch.device("cuda" if gpu else "cpu") self.device = "cpu" print("using device: {}".format(self.device)) model_name = "distilbert-base-uncased" self.model = DistilBertModel.from_pretrained( model_name, output_attentions=True).to(self.device) self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
def download_distilbert_base(): file = '../input/distilbert-base-uncased' config = DistilBertConfig.from_pretrained('distilbert-base-uncased') config.save_pretrained(file) model = DistilBertModel.from_pretrained('distilbert-base-uncased') model.save_pretrained(file) tkn = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') tkn.save_pretrained(file)
def __init__(self, pre_trained: str, class_count: int, sent_count: int): super().__init__() self.sent_count = sent_count self.bert = DistilBertModel.from_pretrained(pre_trained) emb_size = self.bert.config.dim self.class_embs = Parameter(torch.randn(class_count, emb_size)) self.multi_weight = Parameter(torch.randn(class_count, emb_size)) self.multi_bias = Parameter(torch.randn(class_count))
def test_inference_no_head_absolute_embedding(self): model = DistilBertModel.from_pretrained("distilbert-base-uncased") input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]]) attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) output = model(input_ids, attention_mask=attention_mask)[0] expected_shape = torch.Size((1, 11, 768)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( [[[-0.1639, 0.3299, 0.1648], [-0.1746, 0.3289, 0.1710], [-0.1884, 0.3357, 0.1810]]] ) self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4))
def __init__(self, model='bert'): super().__init__() if model == 'bert': self.model = BertModel.from_pretrained('bert-base-uncased') self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # TODO: need to save to prevent downloading again elif model == 'distilbert': self.model = DistilBertModel.from_pretrained( 'distilbert-base-uncased') self.tokenziner = DistilBertTokenizer.from_pretrained( 'distilbert-base-uncased')
def __init__(self, distilbert_config, args): super(DistilBertClassifier, self).__init__(distilbert_config) self.distilbert = DistilBertModel.from_pretrained( args.model_name_or_path, config=distilbert_config) # Load pretrained distilbert self.num_labels = distilbert_config.num_labels self.label_classifier = FCLayer(distilbert_config.hidden_size, distilbert_config.num_labels, args.dropout_rate, use_activation=False)
def get_model_and_tokenizer(model_name, device="cpu", random_weights=False, model_path=None): """ model_path: if given, initialize from path instead of official repo """ init_model = model_name if model_path: print("Initializing model from local path:", model_path) init_model = model_path if model_name.startswith("xlnet"): model = XLNetModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = XLNetTokenizer.from_pretrained(init_model) sep = u"▁" elif model_name.startswith("gpt2"): model = GPT2Model.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = GPT2Tokenizer.from_pretrained(init_model) sep = "Ġ" elif model_name.startswith("xlm"): model = XLMModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = XLMTokenizer.from_pretrained(init_model) sep = "</w>" elif model_name.startswith("bert"): model = BertModel.from_pretrained(init_model, output_hidden_states=True).to(device) tokenizer = BertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("distilbert"): model = DistilBertModel.from_pretrained( init_model, output_hidden_states=True).to(device) tokenizer = DistilBertTokenizer.from_pretrained(init_model) sep = "##" elif model_name.startswith("roberta"): model = RobertaModel.from_pretrained( model_name, output_hidden_states=True).to(device) tokenizer = RobertaTokenizer.from_pretrained(model_name) sep = "Ġ" else: print("Unrecognized model name:", model_name) sys.exit() if random_weights: print("Randomizing weights") model.init_weights() return model, tokenizer, sep
def __init__(self, pretrained=True, **kwargs): super().__init__() hidden_dimension = 32 if pretrained: self.bert = DistilBertModel.from_pretrained( "distilbert-base-uncased") else: self.bert = DistilBertModel(DistilBertConfig()) self.tokenizer = DistilBertTokenizer.from_pretrained( "distilbert-base-uncased") self.pre_classifier = nn.Linear(self.bert.config.dim, hidden_dimension) self.classifier = nn.Linear(hidden_dimension, 1)