Ejemplo n.º 1
0
def loadNet(modelURL,
            numClasses,
            unfreezePretrain=False,
            fromHuggingFace=False):

    if fromHuggingFace == False:
        pretrainedNet = hub.KerasLayer(modelURL,
                                       input_shape=(IMG_SIZE, IMG_SIZE,
                                                    CHANNELS))
        pretrainedNet.trainable = unfreezePretrain  # freezing the pretrained network

    else:
        # config = AutoConfig.from_pretrained(modelURL, config.num_labels = numClasses, config.seq_classif_dropout = 0) #distil
        # tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
        config = DistilBertConfig(
            num_labels=numClasses)  # , seq_classif_dropout = 0.99

        print(f'Number of Classes: {numClasses}')
        # config.num_labels = numClasses
        # config.seq_classif_dropout = 0
        print(config)
        pretrainedNet = TFDistilBertForSequenceClassification.from_pretrained(
            modelURL, config=config)  # TFBertForSequenceClassification
        pretrainedNet.layers[0].trainable = unfreezePretrain

    return pretrainedNet
Ejemplo n.º 2
0
def test(informal):
    if torch.cuda.is_available():
        device = torch.device('cuda:3')
        print(f'Using GPU device: {device}')
    else:
        device = torch.device('cpu')
        print(f'GPU is not available, using CPU device {device}')

    test_config = {'batch_size': 5, 'epoch': 29, 'save_dir': './checkpoints/'}

    test_dataset = FormalDataset(informal)
    dataloader = DataLoader(test_dataset,
                            batch_size=test_config['batch_size'],
                            shuffle=False,
                            num_workers=4,
                            drop_last=False)
    config = DistilBertConfig()
    model = DistilBertForMaskedLM(config)
    load_model(test_config['epoch'], model, test_config['save_dir'])
    model.to(device)
    model.eval()
    with torch.no_grad():
        for i, batch in tqdm(enumerate(dataloader)):
            inp = batch['input_ids'].to(device)
            attn = batch['attention_mask'].to(device)
            logits = model(input_ids=inp, attention_mask=attn)[0]
            preds = decode_text(test_dataset.tokenizer, logits)
            for seq in preds:
                with open('test_pred.txt', 'a') as res_file:
                    res_file.writelines(seq + '\n')
Ejemplo n.º 3
0
def classify(text):
    print('start')

    path = settings.MEDIA_ROOT + "\distilbert.bin"
    MODEL_PATH = 'distilbert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

    encode = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=192,
        pad_to_max_length=True,
        truncation=True,
    )
    device = torch.device('cpu')
    tokens = encode['input_ids']
    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0)
    tokens = tokens.to(device)
    config = DistilBertConfig()
    model = Bert(DistilBertModel(config))

    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)

    output = model(tokens)
    output = output.cpu().detach().numpy()

    print(output)
    output = 0.0 if output < 0.5 else 1.0
    return output
Ejemplo n.º 4
0
def extract_embeddings_for_other_clf():
    distil_bert = "distilbert-base-uncased"

    config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
    config.output_hidden_states = False
    transformer_model = TFDistilBertModel.from_pretrained(distil_bert,
                                                          config=config)

    input_ids_in = tf.keras.layers.Input(shape=(25, ),
                                         name="input_token",
                                         dtype="int32")
    input_masks_in = tf.keras.layers.Input(shape=(25, ),
                                           name="masked_token",
                                           dtype="int32")

    embedding_layer = transformer_model(input_ids_in,
                                        attention_mask=input_masks_in)[0]
    cls_token = embedding_layer[:, 0, :]
    X = tf.keras.layers.BatchNormalization()(cls_token)
    X = tf.keras.layers.Dense(192, activation="relu")(X)
    X = tf.keras.layers.Dropout(0.2)(X)
    X = tf.keras.layers.Dense(3, activation="softmax")(X)
    model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=X)

    for layer in model.layers[:3]:
        layer.trainable = False

    return model
Ejemplo n.º 5
0
    def __init__(self, config: Dict):
        super().__init__()

        self.config = config
        self.model_config = DistilBertConfig(**self.config["model"])
        self.model = DistilBertModel(self.model_config)
        self.criterion = nn.CosineEmbeddingLoss(margin=0.0, reduction='mean')
Ejemplo n.º 6
0
    def __init__(self,
                 embedding_size,
                 projection_size,
                 n_layers,
                 emo_dict,
                 dropout=0.1):
        super(context_classifier_model, self).__init__()

        self.projection_size = projection_size
        self.projection = torch.nn.Linear(embedding_size, projection_size)
        self.position_embeds = torch.nn.Embedding(3, projection_size)
        self.norm = torch.nn.LayerNorm(projection_size)
        self.drop = torch.nn.Dropout(dropout)

        context_config = DistilBertConfig(dropout=dropout,
                                          dim=projection_size,
                                          hidden_dim=4 * projection_size,
                                          n_layers=n_layers,
                                          n_heads=1,
                                          num_labels=4)

        self.context_transformer = DistilBertForSequenceClassification(
            context_config)
        self.others_label = emo_dict['others']
        self.bin_loss_fct = torch.nn.BCEWithLogitsLoss()
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    vocab_size=2)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size],
                                         self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length],
                                      self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = DistilBertConfig(
            vocab_size=self.vocab_size,
            dim=self.hidden_size,
            n_layers=self.num_hidden_layers,
            n_heads=self.num_attention_heads,
            hidden_dim=self.intermediate_size,
            hidden_act=self.hidden_act,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
Ejemplo n.º 8
0
def create_model(model_config: CommentClassifierConfig,
                 saved_weights_path: str = None,
                 max_seq_length: int = MAX_SEQ_LENGTH) -> tf.keras.Model:
    """
    :param model_config:       CommentClassifierConfig
    :param saved_weights_path: If defined, model weights will be loaded
                               from the provided checkpoint path
    :param max_seq_length:     Maximum length of the tokenized input to BERT
    :return:
        Model for text classification using DistilBert transformers
    """
    # Load pre-trained DistilBERT
    bert_config = DistilBertConfig(
        dropout=model_config.bert_dropout,
        attention_dropout=model_config.bert_attention_dropout,
        num_labels=NUM_CLASSES)
    bert_config.output_hidden_states = False
    transformer_model = TFDistilBertModel.from_pretrained(MODEL_NAME,
                                                          config=bert_config)

    input_ids_in = tf.keras.layers.Input(shape=(max_seq_length, ),
                                         name='input_token',
                                         dtype='int32')
    input_masks_in = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           name='masked_token',
                                           dtype='int32')

    embedding_layer = transformer_model(input_ids_in,
                                        attention_mask=input_masks_in)[0]

    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(
            model_config.lstm_units,
            return_sequences=True,
            dropout=model_config.lstm_dropout,
            recurrent_dropout=model_config.lstm_recurrent_dropout))(
                embedding_layer)

    x = tf.keras.layers.GlobalMaxPool1D()(x)
    x = tf.keras.layers.Dense(
        model_config.hidden_layer_dim,
        activation=model_config.hidden_layer_activation)(x)

    x = tf.keras.layers.Dropout(model_config.final_layer_dropout)(x)
    x = tf.keras.layers.Dense(
        NUM_CLASSES, activation=model_config.final_layer_activation)(x)

    model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=x)

    # Use transfer learning only - do not train BERT again
    for layer in model.layers[:3]:
        layer.trainable = False

    # Load weights from a checkpoint, but allow partial matching
    # (e.g. due to a change in the optimizer)
    if saved_weights_path is not None:
        model.load_weights(saved_weights_path).expect_partial()

    return model
    def __init__(self,
                 args,
                 device,
                 d_model=256,
                 nhead=4,
                 d_ff=1024,
                 nlayers=2,
                 dropout=0.5):
        super(Autoencoder, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(d_model,
                                              dropout)  # encoder's position
        self.pos_decoder = PositionalEncoding(d_model,
                                              dropout)  # decoder's position

        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout)
        decoder_norm = nn.LayerNorm(d_model)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers,
                                                      decoder_norm)

        # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2)
        if args.use_albert:
            self.bert_encoder = AlbertModel.from_pretrained(
                "clue/albert_chinese_tiny")
            self.bert_embed = self.bert_encoder.embeddings
            # self.tgt_embed = self.bert_embed
            d_vocab = self.bert_encoder.config.vocab_size + 1
            self.tgt_embed = nn.Sequential(
                Embeddings(d_model, d_vocab),
                PositionalEncoding(d_model, dropout))
        elif args.use_tiny_bert:
            self.bert_encoder = AutoModel.from_pretrained(
                "google/bert_uncased_L-2_H-256_A-4")
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        elif args.use_distil_bert:
            configuration = DistilBertConfig()
            self.bert_encoder = DistilBertModel(configuration)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        # self.tgt_embed = self.bert.embeddings
        else:
            self.bert_encoder = BertModel.from_pretrained(
                args.PRETRAINED_MODEL_NAME,
                output_hidden_states=args.distill_2)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed

        self.distill_2 = args.distill_2
        self.gru = nn.GRU(d_model, d_model, 1)
        self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1)
        self.sigmoid = nn.Sigmoid()
        self.device = device
        self.init_weights()
Ejemplo n.º 10
0
    def __init__(self, dropout):
        super(DISTILBertModel, self).__init__()

        self.distilbert = DistilBertModel.from_pretrained(
            config.PATHS['distilbert'],
            config=DistilBertConfig())

        self.fc = nn.Linear(768, 2)
        self.dropout = nn.Dropout(dropout)
Ejemplo n.º 11
0
def main():
    # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成)
    args = docopt(__doc__)
    pprint(args)

    # パラメータの取得
    lr = float(args['--lr'])
    seq_len = int(args['--seq_len'])
    max_epoch = int(args['--max_epoch'])
    batch_size = int(args['--batch_size'])
    num_train = int(args['--num_train'])
    num_valid = int(args['--num_valid'])

    # モデルの選択
    pretrained_weights = 'distilbert-base-uncased'
    tokenizer = DistilBertTokenizer.from_pretrained(pretrained_weights)
    config = DistilBertConfig(num_labels=4)
    model = DistilBertForSequenceClassification.from_pretrained(
        pretrained_weights, config=config)

    # 使用デバイスの取得
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # データの読み込みとデータセットの作成
    encoder = TwinPhraseEncoder(tokenizer, seq_len)

    train_dataset = WordnetDataset(mode='train',
                                   num_data=num_train,
                                   transform=encoder)
    valid_dataset = WordnetDataset(mode='valid',
                                   num_data=num_valid,
                                   transform=encoder)
    train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True)
    valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True)

    # 最適化法の定義
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 学習
    for epoch in range(1, max_epoch + 1):
        print('=' * 27 + f' Epoch {epoch:0>2} ' + '=' * 27)
        # Training
        loss, accu = train_model(model, optimizer, train_loader, device)
        print(
            f'|  Training    |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # Validation
        loss, accu = valid_model(model, optimizer, valid_loader, device)
        print(
            f'|  Validation  |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # 保存
        torch.save(model.state_dict(), f'../result/bert.pkl')
Ejemplo n.º 12
0
 def __init__(self, tokenizer, device):
     super(VQAModel, self).__init__()
     self.tokenizer = tokenizer
     self.device = device
     self.image_embedding = torch.nn.Sequential(
         *(list(models.resnet152(pretrained=True).children())[:-1]))
     configuration = DistilBertConfig()
     self.question_embedding = DistilBertModel(configuration)
     self.linear1 = torch.nn.Linear(2816, 1024)
     self.linear2 = torch.nn.Linear(1024, 512)
     self.linear3 = torch.nn.Linear(512, 2)
Ejemplo n.º 13
0
 def __init__(
         self,
         pretrainString='distilbert-base-uncased-finetuned-sst-2-english'):
     super().__init__()
     configuration = DistilBertConfig(dropout=0.25, num_labels=7)
     self.bert = DistilBertModel(configuration).from_pretrained(
         pretrainString)
     self.pre_classifier = nn.Linear(configuration.dim, configuration.dim)
     self.classifier = nn.Linear(configuration.dim,
                                 configuration.num_labels)
     self.dropout = nn.Dropout(configuration.seq_classif_dropout)
     self.to(device)
Ejemplo n.º 14
0
 def test_TFDistilBertForQuestionAnswering(self):
     from transformers import DistilBertConfig, TFDistilBertForQuestionAnswering
     keras.backend.clear_session()
     # pretrained_weights = 'distilbert-base-uncased'
     tokenizer_file = 'distilbert_distilbert-base-uncased.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = DistilBertConfig()
     model = TFDistilBertForQuestionAnswering(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
Ejemplo n.º 15
0
 def get_config(self):
     return DistilBertConfig(
         vocab_size=self.vocab_size,
         dim=self.hidden_size,
         n_layers=self.num_hidden_layers,
         n_heads=self.num_attention_heads,
         hidden_dim=self.intermediate_size,
         hidden_act=self.hidden_act,
         dropout=self.hidden_dropout_prob,
         attention_dropout=self.attention_probs_dropout_prob,
         max_position_embeddings=self.max_position_embeddings,
         initializer_range=self.initializer_range,
     )
Ejemplo n.º 16
0
    def __init__(self, pretrained=True, **kwargs):
        super().__init__()
        hidden_dimension = 32

        if pretrained:
            self.bert = DistilBertModel.from_pretrained(
                "distilbert-base-uncased")
        else:
            self.bert = DistilBertModel(DistilBertConfig())
        self.tokenizer = DistilBertTokenizer.from_pretrained(
            "distilbert-base-uncased")
        self.pre_classifier = nn.Linear(self.bert.config.dim, hidden_dimension)
        self.classifier = nn.Linear(hidden_dimension, 1)
Ejemplo n.º 17
0
    def __init__(self, bert_type='bertbase'):
        super(Bert, self).__init__()
        self.bert_type = bert_type

        if bert_type == 'bertbase':
            configuration = BertConfig()
            self.model = BertModel(configuration)
        elif bert_type == 'distilbert':
            configuration = DistilBertConfig()
            self.model = DistilBertModel(configuration)           
        elif bert_type == 'mobilebert':
            configuration = MobileBertConfig.from_pretrained('checkpoints/mobilebert')
            self.model = MobileBertModel(configuration)  
Ejemplo n.º 18
0
 def __init__(self, hidden_size, num_labels, drop_prob, freeze, use_img,
              img_size):
     super(DistilBERT, self).__init__()
     self.img_size = img_size
     self.use_img = use_img
     config = DistilBertConfig(vocab_size=119547)
     self.distilbert = DistilBertModel(config)
     for param in self.distilbert.parameters():
         param.requires_grad = not freeze
     self.classifier = layers.DistilBERTClassifier(hidden_size,
                                                   num_labels,
                                                   drop_prob=drop_prob,
                                                   use_img=use_img,
                                                   img_size=img_size)
Ejemplo n.º 19
0
    def __init__(self,
                 model_name=CFG.text_encoder_model,
                 pretrained=CFG.pretrained,
                 trainable=CFG.trainable):
        super().__init__()
        if pretrained:
            self.model = DistilBertModel.from_pretrained(model_name)
        else:
            self.model = DistilBertModel(config=DistilBertConfig())

        for p in self.model.parameters():
            p.requires_grad = trainable

        # we are using the CLS token hidden representation as the sentence's embedding
        self.target_token_idx = 0
 def __init__(
     self,
     module,
     hidden_size=None,
     dropout=0.5,
 ):
     super(BertForMalwareDetection, self).__init__()
     assert module.__name__ in {
         "RNN",
         "GRU",
         "LSTM",
     }, "`module` must be a `torch.nn` recurrent layer"
     self.bert = DistilBertModel(DistilBertConfig(vocab_size=257))
     bert_hidden_size = self.bert.config.hidden_size
     hidden_size = bert_hidden_size if hidden_size is None else hidden_size
     self.rnn = module(input_size=bert_hidden_size, hidden_size=hidden_size)
     self.dropout = nn.Dropout(dropout)
     self.fc = nn.Linear(hidden_size, 1)
Ejemplo n.º 21
0
def classify_with_pre_trained():
    # model = "neuralmind/bert-base-portuguese-cased"

    config = DistilBertConfig(num_labels=3)
    config.output_hidden_states = False
    transformer_model = TFDistilBertForSequenceClassification.from_pretrained(
        distil_bert, config=config)[0]

    input_ids = tf.keras.layers.Input(shape=(128, ),
                                      name="input_token",
                                      dtype="int32")
    input_masks_ids = tf.keras.layers.Input(shape=(128, ),
                                            name="masked_token",
                                            dtype="int32")
    X = transformer_model(input_ids, input_masks_ids)
    model = tf.keras.Model(inputs=[input_ids, input_masks_ids], outputs=X)

    return model
 def __init__(self):
     super(DiffEval, self).__init__()
     self.h_dim = 256
     configuration = DistilBertConfig(vocab_size=28996,
                                      output_hidden_states=True)
     self.lm = DistilBertModel.from_pretrained('distilbert-base-cased',
                                               config=configuration)
     self.img_embedder = ImageEmbedding()
     self.img_diff = nn.Sequential(
         nn.Linear(2048 * 2, 2 * self.h_dim),
         nn.ReLU(),
     )
     self.mlp = nn.Sequential(
         nn.Linear(2 * self.h_dim + 768, 2 * self.h_dim),
         nn.ReLU(),
         nn.Linear(2 * self.h_dim, self.h_dim),
         nn.ReLU(),
         nn.Linear(self.h_dim, 2),
     )
Ejemplo n.º 23
0
 def __call_model_torch(self):
     if self.model_to_use.lower() == 'bert':
         self.config = BertConfig(num_labels=2)
         self.model = BertForSequenceClassification.from_pretrained(
             'bert-base-uncased', config=self.config)
     elif self.model_to_use.lower() == 'albert':
         self.config = AlbertConfig(num_labels=2)
         self.model = AlbertForSequenceClassification.from_pretrained(
             'albert-base-v1', config=self.config)
     elif self.model_to_use.lower() == 'electra':
         self.config = ElectraConfig(num_labels=2)
         self.model = ElectraForSequenceClassification.from_pretrained(
             'google/electra-small-discriminator', config=self.config)
     elif self.model_to_use.lower() == 'distilbert':
         self.config = DistilBertConfig(num_labels=2)
         self.model = DistilBertForSequenceClassification.from_pretrained(
             'distilbert-base-uncased', config=self.config)
     else:
         print('Model not avaiable yet.')
Ejemplo n.º 24
0
 def __call_model_tf(self):
     if self.model_to_use.lower() == 'bert':
         self.config = BertConfig(num_labels=2)
         self.model = TFBertForSequenceClassification.from_pretrained(
             'bert-base-uncased', config=self.config)
     elif self.model_to_use.lower() == 'albert':
         self.config = AlbertConfig(num_labels=2)
         self.model = TFAlbertForSequenceClassification.from_pretrained(
             'albert-base-v1', config=self.config)
     elif self.model_to_use.lower() == 'electra':
         print(
             'Electra not avaiable for sequence classification with Tensorflow yet.'
         )
     elif self.model_to_use.lower() == 'distilbert':
         self.config = DistilBertConfig(num_labels=2)
         self.model = TFDistilBertForSequenceClassification.from_pretrained(
             'distilbert-base-uncased', config=self.config)
     else:
         print('Model not avaiable yet.')
Ejemplo n.º 25
0
def loadModel(filepath):
    """
    -Function to load model with saved states(parameters)
    -Args:
        filpath (str): path to the saved model
    """
    # load saved model dictionary
    saved = torch.load(filepath, map_location='cpu')
    state_dict = saved['state_dict']
    # load the numberical decoding for the Flair catgory

    # inialize model
    config = DistilBertConfig(num_labels = 9)
    model = DistilBertForSequenceClassification(config)
    # loading the trained parameters with model
    model.load_state_dict(state_dict)

    cat_dict = saved['category']
    return model, cat_dict
Ejemplo n.º 26
0
def create_model(max_seq_len, classes):
    config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
    config.output_hidden_states = False
    tfm = TFDistilBertModel.from_pretrained('./MODEL/uncased/', config=config)
    input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids")
    bert_output = tfm(input_ids)[0]

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=512, activation="tanh")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=256, activation="tanh")(logits)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    return model
Ejemplo n.º 27
0
def get_plm_resources(plm, vocab_len):
    """load PLM resources such as model, tokenizer and config"""
    if plm == 'bert':
        bert_model = BertModel.from_pretrained('bert-base-uncased')
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_config = BertConfig(vocab_size_or_config_json_file=vocab_len)
    elif plm == 'roberta':
        bert_model = RobertaModel.from_pretrained('roberta-base')
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        bert_config = RobertaConfig(vocab_size_or_config_json_file=vocab_len)
    elif plm == 'xlnet':
        bert_model = XLNetModel.from_pretrained('xlnet-base-cased')
        tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
        bert_config = XLNetConfig(vocab_size_or_config_json_file=vocab_len)
    elif plm == 'distilbert':
        bert_model = DistilBertModel.from_pretrained('distilbert-base-uncased')
        tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')
        bert_config = DistilBertConfig(
            vocab_size_or_config_json_file=vocab_len)
    return bert_model, tokenizer, bert_config
Ejemplo n.º 28
0
    def __init__(self, cfg: DictConfig):

        super().__init__(cfg)

        config = DistilBertConfig()
        config.sinusoidal_pos_embds = True

        self.module = DistilBertModel.from_pretrained(
            'distilbert-base-uncased', config=config)

        self.tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')

        # Add in our own positional encodings
        embedding_layer = PositionalBertEmbeddings(self.module.config)
        self.module.embeddings = embedding_layer

        if self.is_frozen:
            for param in self.module.parameters():
                param.requires_grad = False

        layer = nn.TransformerEncoderLayer(self.hidden_size, self.num_heads)
        self.unit = nn.TransformerEncoder(layer, num_layers=self.num_layers)
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = random_attention_mask(
                [self.batch_size, self.seq_length])

        config = DistilBertConfig(
            vocab_size=self.vocab_size,
            dim=self.hidden_size,
            n_layers=self.num_hidden_layers,
            n_heads=self.num_attention_heads,
            hidden_dim=self.intermediate_size,
            hidden_act=self.hidden_act,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            initializer_range=self.initializer_range,
            tie_weights_=True,
        )

        return config, input_ids, attention_mask
import torch
from transformers import DistilBertConfig
from transformers import DistilBertTokenizerFast
from transformers import DistilBertForMaskedLM, DistilBertModel
from transformers import LineByLineTextDataset
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorForLanguageModeling
from pathlib import Path
from tokenizers import ByteLevelBPETokenizer
import os
import torch
print(torch.cuda.is_available())
from transformers import DistilBertConfig

config = DistilBertConfig()

from transformers import DistilBertTokenizerFast

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

from transformers import DistilBertForMaskedLM

model = DistilBertForMaskedLM(config=config)

model.num_parameters()

from transformers import LineByLineTextDataset

dataset = LineByLineTextDataset(
    tokenizer=tokenizer,