예제 #1
0
 def __init__(self, final_size, pooling, bert_path, model_type='distilbert', pool_activation=None): 
     super().__init__()
     if model_type == 'distilbert':
         self.bert = DistilBertModel.from_pretrained(bert_path, return_dict=True)
     else:
         self.bert = BertModel.from_pretrained(bert_path, return_dict=True)
     self.pooler = BertPooler(self.bert.config, final_size, pooling, pool_activation)
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(ContrastiveModel, self).__init__()
        self.original_transformer = DistilBertModel.from_pretrained(
            'distilbert-base-uncased')
        self.translation_transformer = DistilBertModel.from_pretrained(
            'distilbert-base-german-cased')
        self.original_linear = torch.nn.Linear(201 * 768, 256)
        self.translation_linear = torch.nn.Linear(201 * 768, 256)
        self.original_norm = torch.nn.BatchNorm1d(256)
        self.translation_norm = torch.nn.BatchNorm1d(256)

        self.final_linear = torch.nn.Linear(512, 1)
예제 #3
0
 def get_encoder(self):
     if self.hparams.model_type == 'bert':
         encoder = BertModel.from_pretrained('bert-base-uncased')
     elif self.hparams.model_type == 'bert-cased':
         encoder = BertModel.from_pretrained('bert-base-cased')
     elif self.hparams.model_type == 'bert-large':
         encoder = BertModel.from_pretrained('bert-large-uncased')
     elif self.hparams.model_type == 'distilbert':
         encoder = DistilBertModel.from_pretrained(
             'distilbert-base-uncased')
     elif self.hparams.model_type == 'roberta':
         encoder = RobertaModel.from_pretrained('roberta-base')
     elif self.hparams.model_type == 'roberta-large':
         encoder = RobertaModel.from_pretrained('roberta-large')
     elif self.hparams.model_type == 'albert':
         encoder = AlbertModel.from_pretrained('albert-base-v2')
     elif self.hparams.model_type == 'albert-xxlarge':
         encoder = AlbertModel.from_pretrained('albert-xxlarge-v2')
     elif self.hparams.model_type == 'electra':
         encoder = ElectraModel.from_pretrained(
             'google/electra-base-discriminator')
     elif self.hparams.model_type == 'electra-large':
         encoder = ElectraModel.from_pretrained(
             'google/electra-large-discriminator')
     else:
         raise ValueError
     return encoder
예제 #4
0
def get_distilkobert_model(no_cuda=False):
    model = DistilBertModel.from_pretrained('monologg/distilkobert')

    device = "cuda" if torch.cuda.is_available() and not no_cuda else "cpu"
    model.to(device)

    return model
예제 #5
0
    def __init__(self, vocab_size):
        super(DistilBertEncoder, self).__init__()
        self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.bert.resize_token_embeddings(vocab_size)

        for param in self.bert.parameters():
            param.requires_grad = False
예제 #6
0
 def __init__(self):
     super(DistillBERTClass, self).__init__()
     self.l1 = DistilBertModel.from_pretrained(
         config.PRE_TRAINED_MODEL_NAME)
     self.pre_classifier = torch.nn.Linear(768, 768)  # O/P of the bert
     self.dropout = torch.nn.Dropout(0.3)  # Just a dropout
     self.classifier = torch.nn.Linear(
         768, 25)  # Since we combined sentiment to 31 targets
    def __init__(self, config):
        super().__init__(config)

        self.bert = DistilBertModel.from_pretrained('monologg/distilkobert',
                                                    output_hidden_states=True,
                                                    output_attentions=True)
        self.seq_len = config.max_length
        self.linear = torch.nn.Linear(config.hidden_size, 7)
예제 #8
0
 def __init__(self, extra_layers):
     super().__init__()
     self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
     self.config = self.distilbert.config
     self.distilbert = self.distilbert.transformer
     self.distilbert.layer = nn.ModuleList(self.distilbert.layer[6-extra_layers:])
     self.qa_outputs = nn.Linear(768, 2)
     self.dropout = nn.Dropout(0.1)
예제 #9
0
 def __init__(self, num_experts, unfreeze_gate):
     super().__init__()
     self.num_experts = num_experts
     self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
     if not unfreeze_gate:
         self.freeze(model=self.distilbert,unfreeze_layers=[])
     self.pre_classifier = nn.Linear(768, num_experts*64) # dim, hidden
     self.classifier = nn.Linear(num_experts*64, num_experts) # hidden, output
예제 #10
0
    def __init__(self, dropout=0.1):
        super(sentence_embeds_model, self).__init__()

        self.transformer = DistilBertModel.from_pretrained(
            'distilbert-base-uncased',
            dropout=dropout,
            output_hidden_states=True)
        self.embedding_size = 2 * self.transformer.config.hidden_size
예제 #11
0
    def __init__(self):
        super().__init__()

        self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = nn.Linear(768, 768)
        self.classifier = nn.Linear(768, 9)

        self.dropout = nn.Dropout(0.5)
예제 #12
0
 def __init__(self, config):
     super().__init__(config)
     # self.distilbert = DistilBertModel(config)
     self.distilbert = DistilBertModel.from_pretrained(
         'distilbert-base-uncased')
     self.pre_classifier = nn.Linear(config.dim, config.dim)
     self.classifier = nn.Linear(config.dim, config.num_labels)
     self.dropout = nn.Dropout(config.seq_classif_dropout)
예제 #13
0
 def __init__(self, model_name="distilbert-base-uncased", device="cuda"):
     super().__init__()
     self.device = device
     self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
     self.model = DistilBertModel.from_pretrained(model_name).to(
         self.device)
     self.linear = nn.Linear(self.model.config.dim,
                             self.model.config.num_labels).to(self.device)
     self.dropout = nn.Dropout(self.model.config.qa_dropout).to(self.device)
예제 #14
0
 def __init__(self, config, num_labels=2):
     super(DistilBertForSequenceClassification, self).__init__()
     self.num_labels = num_labels
     self.config = config
     self.bert = DistilBertModel.from_pretrained('distilbert-base-uncased',
                                                 output_hidden_states=False)
     self.dropout = nn.Dropout(config.dropout)
     self.classifier = nn.Linear(config.hidden_size, num_labels)
     nn.init.xavier_normal_(self.classifier.weight)
예제 #15
0
    def __init__(self, dropout):
        super(DISTILBertModel, self).__init__()

        self.distilbert = DistilBertModel.from_pretrained(
            config.PATHS['distilbert'],
            config=DistilBertConfig())

        self.fc = nn.Linear(768, 2)
        self.dropout = nn.Dropout(dropout)
예제 #16
0
    def __init__(self, hparams):
        """DS-DST belief tracker.

        Args:
            hparams: hyperparameters.

        Inputs: turn_input, turn_context, turn_span, slot_idx, train
            turn_inputs: Input of a turn including user utterance, system response, belief, gate, action.
            turn_context: Stacked concat of user utterance and system response.
            turn_span: Index of value spans in context.
            slot_idx: Index of slot on ontology.
            train: Whether train or not. Default: True

        Outputs:
            loss if train
            joint_acc, slot_acc else
            loss: Sum of gate loss, span loss and value loss. 
        """

        super(DST, self).__init__()
        self.context_encoder = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")  # use fine-tuning
        self.context_encoder.train()
        self.slot_encoder = DistilBertModel.from_pretrained(
            "distilbert-base-uncased").requires_grad_(False)
        self.value_encoder = DistilBertModel.from_pretrained(
            "distilbert-base-uncased").requires_grad_(False)  # fix parameter
        self.tokenizer = DistilBertTokenizerFast.from_pretrained(
            "distilbert-base-uncased")
        self.hidden_size = self.context_encoder.embeddings.word_embeddings.embedding_dim  # 768
        self.linear_gate = nn.Linear(self.hidden_size * 2,
                                     3)  # none, don't care, prediction
        self.linear_span = nn.Linear(self.hidden_size, 2)  # start, end
        self.value_ontology = json.load(
            open(os.path.join(hparams.data_path, "ontology_processed.json"),
                 "r"))
        self.gate_loss_weight = torch.tensor([0.5, 1.0, 1.0])
        self.gate_criterion = torch.nn.NLLLoss(weight=self.gate_loss_weight)
        # self.context_attention = SelfAttention(self.hidden_size, hparams.dropout)
        # self.context_attention = copy.deepcopy(self.context_encoder.transformer.layer[-1])
        # self.context_attention.train()

        self.margin = hparams.margin
        self.use_span = hparams.use_span  # default False
    def generated_word_embedding(self, labels_csv_path):
        save_features_path = self.data_path
        if not os.path.exists(os.path.join(labels_csv_path)):
            print("It not existed {} file, please double check it.".format(
                labels_csv_path))
        else:
            csv_file_name = labels_csv_path.split('/')[-1].split('.')[0]
            path_to_save_wb = csv_file_name + '_WE'
            if not os.path.exists(
                    os.path.join(save_features_path, path_to_save_wb)):
                os.mkdir(os.path.join(save_features_path, path_to_save_wb))
            path_to_save_features = os.path.join(save_features_path,
                                                 path_to_save_wb)

            data = pd.read_csv(labels_csv_path)
            class_id = data.loc[:, ['labels_id']].values.squeeze()
            class_name = data.loc[:, ['labels']].values.squeeze()

            tokenizer = DistilBertTokenizer.from_pretrained(
                'distilbert-base-uncased')
            model = DistilBertModel.from_pretrained('distilbert-base-uncased')
            model.eval()
            model.cuda()
            device = torch.device("cuda")

            assert len(class_id) == len(class_name)

            f = 0
            for i in range(len(class_id)):
                label = class_id[i]
                feat = ()
                if f >= 0:
                    for sent in class_name:
                        inputs = tokenizer.encode_plus(
                            sent,
                            add_special_tokens=True,
                            return_tensors='pt',
                        )
                        input_ids2 = torch.tensor(
                            tokenizer.encode(sent)).unsqueeze(
                                0)  # Batch size 1
                        input_ids2 = input_ids2.to(device)
                        with torch.no_grad():
                            outputs2 = model(input_ids2)
                            o2 = outputs2[0].to('cpu').numpy()
                        feat += tuple(o2)
                    scio.savemat(
                        path_to_save_features + '/' + str(label) + '_' +
                        str(class_name[i]) + '.mat', {
                            'feat_v': feat,
                            'GT': label
                        })
                f += 1
            print('Finished generate word-embedding from "{}.csv", '
                  'stored in the "{}"'.format(csv_file_name,
                                              path_to_save_features))
예제 #18
0
	def getDistilBertEmbeddings(self):
		model = DistilBertModel.from_pretrained('distilbert-base-uncased')
		tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
		model.eval()
		tokens_tensor, segments_tensors = self.getIndexs(tokenizer)
		with torch.no_grad():
			last_hidden_states = model(tokens_tensor, attention_mask=segments_tensors)
		features = last_hidden_states[0][:,0,:].numpy()
		features = np.reshape(features,features.shape[1])
		return(features.tolist())
예제 #19
0
    def __init__(self):
        super(DISTILBERT_SQUAD, self).__init__()

        self.distilbert_model = DistilBertModel.from_pretrained(
            'distilbert-base-uncased')

        self.fc_layers = nn.Sequential(nn.Linear(768, 256), nn.ReLU(),
                                       nn.Linear(256, 2))

        self.criterion = nn.CrossEntropyLoss()
예제 #20
0
 def from_pretrained(cls, cfg, **kwargs):
     """
     Load pretrained model from https://huggingface.co/models
     :param cfg: general hydra config
     :return: DistilBertModel object
     """
     model = DistilBertModel.from_pretrained(
         cfg.model.model_path,
         num_labels=cfg.data_params.n_classes
     )
     return cls(model, cfg)
예제 #21
0
def get_model_and_tokenizer(model_name, device, random_weights=False):

    model_name = model_name

    if model_name.startswith('xlnet'):
        model = XLNetModel.from_pretrained(
            model_name, output_hidden_states=True).to(device)
        tokenizer = XLNetTokenizer.from_pretrained(model_name)
        sep = u'▁'
        emb_dim = 1024 if "large" in model_name else 768
    elif model_name.startswith('gpt2'):
        model = GPT2Model.from_pretrained(model_name,
                                          output_hidden_states=True).to(device)
        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        sep = 'Ġ'
        sizes = {
            "gpt2": 768,
            "gpt2-medium": 1024,
            "gpt2-large": 1280,
            "gpt2-xl": 1600
        }
        emb_dim = sizes[model_name]
    elif model_name.startswith('xlm'):
        model = XLMModel.from_pretrained(model_name,
                                         output_hidden_states=True).to(device)
        tokenizer = XLMTokenizer.from_pretrained(model_name)
        sep = '</w>'
    elif model_name.startswith('bert'):
        model = BertModel.from_pretrained(model_name,
                                          output_hidden_states=True).to(device)
        tokenizer = BertTokenizer.from_pretrained(model_name)
        sep = '##'
        emb_dim = 1024 if "large" in model_name else 768
    elif model_name.startswith('distilbert'):
        model = DistilBertModel.from_pretrained(
            model_name, output_hidden_states=True).to(device)
        tokenizer = DistilBertTokenizer.from_pretrained(model_name)
        sep = '##'
        emb_dim = 768
    elif model_name.startswith('roberta'):
        model = RobertaModel.from_pretrained(
            model_name, output_hidden_states=True).to(device)
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        sep = 'Ġ'
        emb_dim = 1024 if "large" in model_name else 768
    else:
        print('Unrecognized model name:', model_name)
        sys.exit()

    if random_weights:
        print('Randomizing weights')
        model.init_weights()

    return model, tokenizer, sep, emb_dim
def calculate_BERT_representation(input_text):
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    model = DistilBertModel.from_pretrained("distilbert-base-uncased")
    input_ids = torch.tensor(tokenizer.encode(input_text)).unsqueeze(0)
    outputs = model(input_ids)
    last_hidden_states = outputs[0]

    mean_representation = torch.mean(last_hidden_states.squeeze(0)[1:-1],
                                     dim=0)

    return mean_representation
예제 #23
0
    def __init__(self):
        gpu = torch.cuda.is_available()
        self.device = torch.device("cuda" if gpu else "cpu")
        self.device = "cpu"
        print("using device: {}".format(self.device))

        model_name = "distilbert-base-uncased"
        self.model = DistilBertModel.from_pretrained(
            model_name, output_attentions=True).to(self.device)

        self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
예제 #24
0
def download_distilbert_base():
    file = '../input/distilbert-base-uncased'

    config = DistilBertConfig.from_pretrained('distilbert-base-uncased')
    config.save_pretrained(file)
    
    model = DistilBertModel.from_pretrained('distilbert-base-uncased')
    model.save_pretrained(file)

    tkn = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    tkn.save_pretrained(file)
예제 #25
0
    def __init__(self, pre_trained: str, class_count: int, sent_count: int):
        super().__init__()

        self.sent_count = sent_count

        self.bert = DistilBertModel.from_pretrained(pre_trained)

        emb_size = self.bert.config.dim
        self.class_embs = Parameter(torch.randn(class_count, emb_size))
        self.multi_weight = Parameter(torch.randn(class_count, emb_size))
        self.multi_bias = Parameter(torch.randn(class_count))
예제 #26
0
    def test_inference_no_head_absolute_embedding(self):
        model = DistilBertModel.from_pretrained("distilbert-base-uncased")
        input_ids = torch.tensor([[0, 345, 232, 328, 740, 140, 1695, 69, 6078, 1588, 2]])
        attention_mask = torch.tensor([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
        output = model(input_ids, attention_mask=attention_mask)[0]
        expected_shape = torch.Size((1, 11, 768))
        self.assertEqual(output.shape, expected_shape)
        expected_slice = torch.tensor(
            [[[-0.1639, 0.3299, 0.1648], [-0.1746, 0.3289, 0.1710], [-0.1884, 0.3357, 0.1810]]]
        )

        self.assertTrue(torch.allclose(output[:, 1:4, 1:4], expected_slice, atol=1e-4))
    def __init__(self, model='bert'):
        super().__init__()

        if model == 'bert':
            self.model = BertModel.from_pretrained('bert-base-uncased')
            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
            # TODO: need to save to prevent downloading again
        elif model == 'distilbert':
            self.model = DistilBertModel.from_pretrained(
                'distilbert-base-uncased')
            self.tokenziner = DistilBertTokenizer.from_pretrained(
                'distilbert-base-uncased')
예제 #28
0
    def __init__(self, distilbert_config, args):
        super(DistilBertClassifier, self).__init__(distilbert_config)
        self.distilbert = DistilBertModel.from_pretrained(
            args.model_name_or_path,
            config=distilbert_config)  # Load pretrained distilbert

        self.num_labels = distilbert_config.num_labels

        self.label_classifier = FCLayer(distilbert_config.hidden_size,
                                        distilbert_config.num_labels,
                                        args.dropout_rate,
                                        use_activation=False)
예제 #29
0
def get_model_and_tokenizer(model_name,
                            device="cpu",
                            random_weights=False,
                            model_path=None):
    """
    model_path: if given, initialize from path instead of official repo
    """

    init_model = model_name
    if model_path:
        print("Initializing model from local path:", model_path)
        init_model = model_path

    if model_name.startswith("xlnet"):
        model = XLNetModel.from_pretrained(
            init_model, output_hidden_states=True).to(device)
        tokenizer = XLNetTokenizer.from_pretrained(init_model)
        sep = u"▁"
    elif model_name.startswith("gpt2"):
        model = GPT2Model.from_pretrained(init_model,
                                          output_hidden_states=True).to(device)
        tokenizer = GPT2Tokenizer.from_pretrained(init_model)
        sep = "Ġ"
    elif model_name.startswith("xlm"):
        model = XLMModel.from_pretrained(init_model,
                                         output_hidden_states=True).to(device)
        tokenizer = XLMTokenizer.from_pretrained(init_model)
        sep = "</w>"
    elif model_name.startswith("bert"):
        model = BertModel.from_pretrained(init_model,
                                          output_hidden_states=True).to(device)
        tokenizer = BertTokenizer.from_pretrained(init_model)
        sep = "##"
    elif model_name.startswith("distilbert"):
        model = DistilBertModel.from_pretrained(
            init_model, output_hidden_states=True).to(device)
        tokenizer = DistilBertTokenizer.from_pretrained(init_model)
        sep = "##"
    elif model_name.startswith("roberta"):
        model = RobertaModel.from_pretrained(
            model_name, output_hidden_states=True).to(device)
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        sep = "Ġ"
    else:
        print("Unrecognized model name:", model_name)
        sys.exit()

    if random_weights:
        print("Randomizing weights")
        model.init_weights()

    return model, tokenizer, sep
예제 #30
0
    def __init__(self, pretrained=True, **kwargs):
        super().__init__()
        hidden_dimension = 32

        if pretrained:
            self.bert = DistilBertModel.from_pretrained(
                "distilbert-base-uncased")
        else:
            self.bert = DistilBertModel(DistilBertConfig())
        self.tokenizer = DistilBertTokenizer.from_pretrained(
            "distilbert-base-uncased")
        self.pre_classifier = nn.Linear(self.bert.config.dim, hidden_dimension)
        self.classifier = nn.Linear(hidden_dimension, 1)