Esempio n. 1
0
 def get_encoder(self):
     if self.hparams.model_type == 'bert':
         encoder = BertModel.from_pretrained('bert-base-uncased')
     elif self.hparams.model_type == 'bert-cased':
         encoder = BertModel.from_pretrained('bert-base-cased')
     elif self.hparams.model_type == 'bert-large':
         encoder = BertModel.from_pretrained('bert-large-uncased')
     elif self.hparams.model_type == 'distilbert':
         encoder = DistilBertModel.from_pretrained(
             'distilbert-base-uncased')
     elif self.hparams.model_type == 'roberta':
         encoder = RobertaModel.from_pretrained('roberta-base')
     elif self.hparams.model_type == 'roberta-large':
         encoder = RobertaModel.from_pretrained('roberta-large')
     elif self.hparams.model_type == 'albert':
         encoder = AlbertModel.from_pretrained('albert-base-v2')
     elif self.hparams.model_type == 'albert-xxlarge':
         encoder = AlbertModel.from_pretrained('albert-xxlarge-v2')
     elif self.hparams.model_type == 'electra':
         encoder = ElectraModel.from_pretrained(
             'google/electra-base-discriminator')
     elif self.hparams.model_type == 'electra-large':
         encoder = ElectraModel.from_pretrained(
             'google/electra-large-discriminator')
     else:
         raise ValueError
     return encoder
    def __init__(self, num_labels: int,
                 top_comment_pretrained_model_name_or_path: Path,
                 post_pretrained_model_name_or_path: Path,
                 classifier_dropout_prob: float, meta_data_size: int,
                 subreddit_pretrained_path: Path,
                 num_subreddit_embeddings: int,
                 subreddit_embeddings_size: int):
        super(AlbertForEigenmetricRegression, self).__init__()
        self.num_labels = num_labels
        self.construct_param_dict = \
            OrderedDict({"num_labels": num_labels,
                         "top_comment_pretrained_model_name_or_path": str(top_comment_pretrained_model_name_or_path),
                         "post_pretrained_model_name_or_path": str(post_pretrained_model_name_or_path),
                         "classifier_dropout_prob": classifier_dropout_prob,
                         "meta_data_size": meta_data_size,
                         "subreddit_pretrained_path": str(subreddit_pretrained_path),
                         "num_subreddit_embeddings": num_subreddit_embeddings,
                         "subreddit_embeddings_size": subreddit_embeddings_size})

        # load pretrained two albert models: one for top comment and the other for post
        self.top_comment_albert = AlbertModel.from_pretrained(
            top_comment_pretrained_model_name_or_path)
        self.post_albert = AlbertModel.from_pretrained(
            post_pretrained_model_name_or_path)
        # drop out layer
        self.dropout = nn.Dropout(classifier_dropout_prob)
        # the final classifier layer
        self.hidden_dimension = \
            self.top_comment_albert.config.hidden_size + self.post_albert.config.hidden_size \
            + meta_data_size + subreddit_embeddings_size
        self.classifier = nn.Linear(self.hidden_dimension, num_labels)
        self._init_weights(self.classifier)  # initialize the classifier
        # subreddit embeddings
        subreddit_embeddings = None
        if subreddit_pretrained_path:
            embeddings = torch.load(subreddit_pretrained_path)
            subreddit_embeddings = nn.Embedding.from_pretrained(embeddings,
                                                                freeze=False)
            logger.info(
                f"Loaded subreddit embeddings from {subreddit_pretrained_path}"
            )
        elif num_subreddit_embeddings is not None and subreddit_embeddings_size is not None:
            subreddit_embeddings = nn.Embedding(num_subreddit_embeddings,
                                                subreddit_embeddings_size)
            # these numbers are gained from the pretrained embeddings
            torch.nn.init.normal_(subreddit_embeddings.weight,
                                  mean=0,
                                  std=0.49)
            logger.info(f"Initialized subreddit embeddings")
        self.subreddit_embeddings = subreddit_embeddings
        self.is_feature_extractor = False
Esempio n. 3
0
    def __init__(self, n_outputs, size, pretrained_model_path=False):
        super(Albert, self).__init__()
        self.n_outputs = n_outputs
        self.size = size
        self.pretrained_model_path = pretrained_model_path

        if self.pretrained_model_path is False:
            self.huggingface_model = AlbertModel.from_pretrained(
                f"albert-{size}-v2")
        else:
            self.huggingface_model = AlbertModel.from_pretrained(
                pretrained_model_path)
        self.dropout = nn.Dropout(0.1)  # hard coding
        self.out_proj = nn.Linear(self.huggingface_model.config.hidden_size,
                                  n_outputs)
Esempio n. 4
0
def getBertModel():
    # tokenizer = DistilBertTokenizer.from_pretrained(CONF['BERT_MODEL_PATH'])
    # model = DistilBertModel.from_pretrained(CONF['BERT_MODEL_PATH'])

    tokenizer = BertTokenizer.from_pretrained(CONF['BERT_MODEL_PATH'])
    model = AlbertModel.from_pretrained(CONF['BERT_MODEL_PATH'])
    return model, tokenizer
Esempio n. 5
0
    def __init__(self, config):
        super().__init__()
        self._if_infer = False

        self.classes = config.classes
        self.num_classes = config.num_classes
        self.device = config.device
        self.f1_average = config.f1_average
        self.num_labels = config.num_labels
        self.feature_cols = config.feature_cols

        self.encoder = AlbertModel.from_pretrained(config.transfer_path)
        [
            setattr(param, "requires_grad", True)
            for param in self.encoder.parameters()
        ]

        self.fc = nn.Sequential(
            nn.BatchNorm1d(config.transfer_hidden),
            nn.Linear(config.transfer_hidden, config.linear_size),
            nn.ReLU(inplace=True), nn.BatchNorm1d(config.linear_size),
            nn.Dropout(config.dropout),
            nn.Linear(config.linear_size, self.num_classes))

        self.criterion = nn.CrossEntropyLoss()
Esempio n. 6
0
    def __init__(self, my_config, args):
        super(NqModel, self).__init__()
        #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072,
        #                                        attention_probs_dropout_prob=0)
        self.my_mask = None
        self.args = args
        #mfeb/albert-xxlarge-v2-squad2
        self.bert_config = AlbertConfig.from_pretrained("albert-xxlarge-v2")
        # self.bert_config.gradient_checkpointing = True
        # self.bert_config.Extgradient_checkpointing = True
        self.bert = AlbertModel.from_pretrained("albert-xxlarge-v2",
                                                config=self.bert_config)
        #        self.bert = AlbertModel.from_pretrained("albert-base-v2")
        my_config.hidden_size = self.bert.config.hidden_size

        self.right = 0
        self.all = 0
        #self.bert =  AlbertModel(albert_base_configuration)

        #self.bert2 = BertModel(bert_config)

        #self.bert = BertModel(BertConfig())

        #self.bert =  RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265))

        #print(my_config,bert_config)
        #        self.tok_dense = nn.Linear(my_config.hidden_size, my_config.hidden_size)
        self.tok_dense = nn.Linear(my_config.hidden_size * 2,
                                   my_config.hidden_size * 2)

        #        self.tok_dense2 = nn.Linear(my_config.hidden_size, my_config.hidden_size)
        #        self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        #        self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)

        self.dropout = nn.Dropout(my_config.hidden_dropout_prob)

        self.tok_outputs = nn.Linear(my_config.hidden_size * 2,
                                     1)  # tune to avoid fell into bad places

        #        self.tok_outputs2 = nn.Linear(my_config.hidden_size, 1)
        #        config.max_token_len, config.max_token_relative
        #        self.para_outputs = nn.Linear(self.config.hidden_size, 1)
        #        self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2)

        #        self.tok_to_label = nn.Linear(my_config.max_token_len,2)
        #        self.par_to_label = nn.Linear(my_config.max_paragraph_len,2)

        #self.encoder = Encoder(my_config)
        self.encoder = Encoder(my_config)
        #        self.encoder2 = Encoder(my_config)

        self.my_config = my_config

        self.model_choice = None
        self.ground_answer = None

        self.ACC = 0
        self.ALL = 0

        self.ErrId = []
Esempio n. 7
0
    def __init__(self):
        super(AlbertClassifier, self).__init__()
        D_in, H, D_out = 768, 50, 2
        self.albert = AlbertModel.from_pretrained('albert-base-v1')

        self.classifier = nn.Sequential(nn.Linear(D_in, H), nn.ReLU(),
                                        nn.Dropout(0.4), nn.Linear(H, D_out))
Esempio n. 8
0
    def __init__(self, config):
        super(AlBert, self).__init__()
        model_config = AlbertConfig.from_pretrained(
            config.config_file,
            num_labels=config.num_labels,
            finetuning_task=config.task,
        )
        self.albert = AlbertModel.from_pretrained(
            config.model_name_or_path,
            config=model_config,
        )
        if config.requires_grad:
            for param in self.albert.parameters():
                param.requires_grad = True
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        #add the weighted layer
        self.hidden_weight = config.weighted_layer_tag         #must modify the config.json
        self.pooling_tag = config.pooling_tag

        if self.hidden_weight:
            self.weight_layer = config.weighted_layer_num
            #self.weight = torch.zeros(self.weight_layer).to(config.device)
            self.weight = torch.nn.Parameter(torch.FloatTensor(self.weight_layer), requires_grad=True)
            self.softmax = nn.Softmax()
            self.pooler = nn.Sequential(nn.Linear(768, 768), nn.Tanh())

        elif self.pooling_tag:
            self.maxPooling = nn.MaxPool1d(64)
            self.avgPooling = nn.AvgPool1d(64)
            self.pooler = nn.Sequential(nn.Linear(768*3, 768), nn.Tanh())
Esempio n. 9
0
 def _get_fallback_model(self) -> AlbertModel:
     """ Returns the CPU model """
     if not self._model_fallback:
         self._model_fallback = AlbertModel.from_pretrained(
             self._model_directory
         ).eval()
     return self._model_fallback
Esempio n. 10
0
def build_ban(dataset, num_hid, op='', gamma=4, task='vqa', use_counter=True):
    #w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, .0, op)
    #q_emb = QuestionEmbedding(300 if 'c' not in op else 600, num_hid, 1, False, .0)
    w_emb = AlbertTokenizer.from_pretrained('albert-large-v2')
    q_emb = AlbertModel.from_pretrained('albert-large-v2')
    params_set = set()
    for param in q_emb.parameters():
        params_set.add(param)
        param.requires_grad = False
    v_att = BiAttention(dataset.v_dim, num_hid, num_hid, gamma)
    if task == 'vqa':
        b_net = []
        q_prj = []
        c_prj = []
        objects = 10  # minimum number of boxes
        for i in range(gamma):
            b_net.append(BCNet(dataset.v_dim, num_hid, num_hid, None, k=1))
            q_prj.append(FCNet([num_hid, num_hid], '', .2))
            c_prj.append(FCNet([objects + 1, num_hid], 'ReLU', .0))
        classifier = SimpleClassifier(num_hid, num_hid * 2,
                                      dataset.num_ans_candidates, .5)
        counter = Counter(objects) if use_counter else None
        return BanModel(dataset, params_set, w_emb, q_emb, v_att, b_net, q_prj,
                        c_prj, classifier, counter, op, gamma)
    elif task == 'flickr':
        return BanModel_flickr(w_emb, q_emb, v_att, op, gamma)
Esempio n. 11
0
    def __init__(
            self,
            lang: str = 'en',
            ):
        try:
            from transformers import BertJapaneseTokenizer, AlbertTokenizer, CamembertTokenizer, AutoTokenizer
            from transformers import AlbertModel, CamembertModel, AutoModel
        except ImportError:
            msg = "importing bert dep failed."
            msg += "\n try to install sister by `pip install sister[bert]`."
            raise ImportError(msg)

        if lang == "en":
            tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
            model = AlbertModel.from_pretrained("albert-base-v2")
        elif lang == "fr":
            tokenizer = CamembertTokenizer.from_pretrained("camembert-base")
            model = CamembertModel.from_pretrained("camembert-base")
        elif lang == "es":
            tokenizer = AutoTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased")
            model = AutoModel.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased")
        elif lang == "ja":
            tokenizer = BertJapaneseTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")
            model = BertModel.from_pretrained("cl-tohoku/bert-base-japanese-whole-word-masking")

        self.tokenizer = tokenizer
        self.model = model
Esempio n. 12
0
 def __init__(self):
     super().__init__()
     self.bert = AlbertModel.from_pretrained('albert-base-v2')
     self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
     self.score_fc = nn.Linear(768, 11)
     self.regression_fc = nn.Linear(768, 1)
     self.sigmoid = nn.Sigmoid()
Esempio n. 13
0
    def __init__(
        self,
        dropout: float,
        num_class: int,
        ptrain_ver: str,
    ):
        super().__init__()

        # Check if `ptrain_ver` is supported.
        if ptrain_ver not in TeacherAlbert.allow_ptrain_ver:
            raise ValueError(
                f'`ptrain_ver` {ptrain_ver} is not supported.\n' +
                'Supported options:' + ''.join(
                    list(
                        map(lambda option: f'\n\t--ptrain_ver {option}',
                            TeacherAlbert.allow_ptrain_ver.keys()))))

        # Load pre-train ALBERT model.
        self.encoder = AlbertModel.from_pretrained(ptrain_ver)

        # Dropout layer between encoder and linear layer.
        self.dropout = nn.Dropout(dropout)

        # Linear layer project from `d_model` into `num_class`.
        self.linear_layer = nn.Linear(
            in_features=TeacherAlbert.allow_ptrain_ver[ptrain_ver],
            out_features=num_class)

        # Linear layer initialization.
        with torch.no_grad():
            nn.init.normal_(self.linear_layer.weight, mean=0.0, std=0.02)
            nn.init.zeros_(self.linear_layer.bias)
Esempio n. 14
0
    def __init__(self,
                 max_len,
                 hidden_layer=320,
                 dropout=0.1,
                 pretrained_model='bert',
                 out_features=2):
        """
        Initialise the model
        :param max_len: Maximum length of tokens in a sentence. If the sentence
                is too short, it should be zero padded
        :param hidden_layer: number of neurons to use in hidden dense layer
        :param dropout: dropout probability to use
        :param pretrained_model: type of the pre-trained model to use. Accepted
                strings are 'bert' and 'albert'
        :param out_features: number of out features e.g. 2 if you want only
                positive and negative
        """
        super().__init__()

        if pretrained_model == 'bert':
            self.pretrained_model = BertModel.from_pretrained(
                'bert-base-uncased')
        elif pretrained_model == 'albert':
            self.pretrained_model = AlbertModel.from_pretrained(
                'albert-base-v2')

        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(max_len, hidden_layer)
        self.fc2 = nn.Linear(hidden_layer, out_features)

        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
Esempio n. 15
0
 def test_model_from_pretrained(self):
     cache_dir = "/tmp/transformers_test/"
     for model_name in list(ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = AlbertModel.from_pretrained(model_name,
                                             cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)
Esempio n. 16
0
    def __init__(self,
                 model_name_or_path: str,
                 max_seq_length: int = 128,
                 do_lower_case: Optional[bool] = None,
                 model_args: Dict = {},
                 tokenizer_args: Dict = {}):
        super(ALBERT, self).__init__()
        self.config_keys = ['max_seq_length', 'do_lower_case']
        self.do_lower_case = do_lower_case

        if max_seq_length > 510:
            logging.warning(
                "BERT only allows a max_seq_length of 510 (512 with special tokens). Value will be set to 510"
            )
            max_seq_length = 510
        self.max_seq_length = max_seq_length

        if self.do_lower_case is not None:
            tokenizer_args['do_lower_case'] = do_lower_case

        self.albert = AlbertModel.from_pretrained(model_name_or_path,
                                                  **model_args)
        # self.tokenizer = AlbertTokenizer.from_pretrained(model_name_or_path, **tokenizer_args)
        self.tokenizer = BertTokenizer.from_pretrained(model_name_or_path,
                                                       **tokenizer_args)
Esempio n. 17
0
 def __init__(self, albert_path, dropout, n_class):
     super(AlbertClassifier, self).__init__()
     self.albert_path = albert_path
     self.n_class = n_class
     self.albert = AlbertModel.from_pretrained(self.albert_path)
     self.dropout = nn.Dropout(dropout)
     self.fc = nn.Linear(self.albert.config.hidden_size, n_class)
Esempio n. 18
0
 def __init__(self, pretrain_path, max_length):
     nn.Module.__init__(self)
     self.bert = AlbertModel.from_pretrained(pretrain_path)
     for param in self.bert.parameters():
         param.requires_grad = False
     self.bert.eval()
     self.max_length = max_length
    def __init__(self, batch_size, lstm_hid_dim, d_a, n_classes, is_train,
                 label_input_ids, label_attention_masks):
        super(StructuredSelfAttentionBert, self).__init__()
        self.n_classes = n_classes

        if config.bert_model == 'bert':
            self.bert = BertModel.from_pretrained('bert-base-uncased')
        elif config.bert_model == 'albert':
            self.bert = AlbertModel.from_pretrained('albert-base-v1')
        else:
            raise Exception('Error Bert model.')

        for name, param in self.bert.named_parameters():
            param.requires_grad = is_train
        self.linear_label = torch.nn.Linear(config.bert_embedding_size,
                                            lstm_hid_dim)
        self.lstm = torch.nn.LSTM(input_size=config.bert_embedding_size,
                                  hidden_size=lstm_hid_dim,
                                  num_layers=1,
                                  batch_first=True,
                                  bidirectional=True)
        self.linear_first = torch.nn.Linear(lstm_hid_dim * 2, d_a)
        self.linear_second = torch.nn.Linear(d_a, n_classes)

        self.weight1 = torch.nn.Linear(lstm_hid_dim * 2, 1)
        self.weight2 = torch.nn.Linear(lstm_hid_dim * 2, 1)

        self.output_layer = torch.nn.Linear(config.bert_embedding_size,
                                            n_classes)
        self.embedding_dropout = torch.nn.Dropout(p=0.1)
        self.batch_size = batch_size
        self.lstm_hid_dim = lstm_hid_dim

        self.label_input_ids = label_input_ids
        self.label_attention_masks = label_attention_masks
Esempio n. 20
0
    def __init__(
        self,
        lang: str = "en",
    ):
        try:
            from transformers import (AlbertModel, AlbertTokenizer, BertConfig,
                                      BertJapaneseTokenizer, BertModel,
                                      CamembertModel, CamembertTokenizer)
        except ImportError:
            msg = "importing bert dep failed."
            msg += "\n try to install sister by `pip install sister[bert]`."
            raise ImportError(msg)

        if lang == "en":
            model_name = "albert-base-v2"
            tokenizer = AlbertTokenizer.from_pretrained(model_name)
            config = BertConfig.from_pretrained(model_name,
                                                output_hidden_states=True)
            model = AlbertModel.from_pretrained(model_name, config=config)
        elif lang == "fr":
            model_name = "camembert-base"
            tokenizer = CamembertTokenizer.from_pretrained(model_name)
            config = BertConfig.from_pretrained(model_name,
                                                output_hidden_states=True)
            model = CamembertModel.from_pretrained(model_name, config=config)
        elif lang == "ja":
            model_name = "cl-tohoku/bert-base-japanese-whole-word-masking"
            tokenizer = BertJapaneseTokenizer.from_pretrained(model_name)
            config = BertConfig.from_pretrained(model_name,
                                                output_hidden_states=True)
            model = BertModel.from_pretrained(model_name, config=config)

        self.tokenizer = tokenizer
        self.model = model
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ALBERT constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = AlbertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = AlbertModel(config)
        elif pretrained_model_name is not None:
            model = AlbertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = AlbertConfig.from_json_file(config_filename)
            model = AlbertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ALBERT constructor"
            )

        model.to(self._device)

        self.add_module("albert", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Esempio n. 22
0
    def __init__(self,  num_classes, model_path='./albert_base'):
        super(AlbertCrf, self).__init__()

        self.albert = AlbertModel.from_pretrained(model_path)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(self.albert.config.hidden_size, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.crf = CRF(num_classes, batch_first=True)
 def __init__(self, ):
     super().__init__()
     self.tokenizer = BertTokenizer.from_pretrained('./alberttiny')
     self.model = AlbertModel.from_pretrained('./alberttiny').to(
         Config.device)
     self.is_src = None
     #self.w2v= gensim.models.KeyedVectors.load_word2vec_format('../news_comment/baike_26g_news_13g_novel_229g.bin', binary=True)
     '''
Esempio n. 24
0
 def __init__(self, drop_prob=0):
     super(BertQA, self).__init__()
     self.bert = AlbertModel.from_pretrained("albert-large-v2")
     #for QA
     self.ans_se = nn.Linear(1024, 2)
     #for Beer, ten level sentiment
     self.sentiment = nn.Linear(1024, 1)
     self.sentiment_movie = nn.Linear(1024, 1)
     self.sigmoid = nn.Sigmoid()
Esempio n. 25
0
 def __init__(self,hidden_dim=768,num_tags=20):
     super(TagValueModel,self).__init__()
     self.albert = AlbertModel.from_pretrained("ALINEAR/albert-japanese-v2")
     self.dropout = nn.Dropout(0.1)
     self.tags = nn.Linear(768,num_tags)
     #self.tags_insentence = nn.Linear(768,20)
     self.starts = nn.Linear(768,num_tags)
     self.ends = nn.Linear(768,num_tags)
     self.num_tags = num_tags
Esempio n. 26
0
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = AlbertConfig.from_pretrained(config.albert_config_path)
        self.albert = AlbertModel.from_pretrained(config.albert_model_path,
                                                  config=self.config)
        for param in self.albert.parameters():
            param.requires_grad = True

        self.fc = nn.Linear(config.hidden_size, config.num_classes)
    def __init__(self,
                 args,
                 device,
                 d_model=256,
                 nhead=4,
                 d_ff=1024,
                 nlayers=2,
                 dropout=0.5):
        super(Autoencoder, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(d_model,
                                              dropout)  # encoder's position
        self.pos_decoder = PositionalEncoding(d_model,
                                              dropout)  # decoder's position

        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout)
        decoder_norm = nn.LayerNorm(d_model)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers,
                                                      decoder_norm)

        # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2)
        if args.use_albert:
            self.bert_encoder = AlbertModel.from_pretrained(
                "clue/albert_chinese_tiny")
            self.bert_embed = self.bert_encoder.embeddings
            # self.tgt_embed = self.bert_embed
            d_vocab = self.bert_encoder.config.vocab_size + 1
            self.tgt_embed = nn.Sequential(
                Embeddings(d_model, d_vocab),
                PositionalEncoding(d_model, dropout))
        elif args.use_tiny_bert:
            self.bert_encoder = AutoModel.from_pretrained(
                "google/bert_uncased_L-2_H-256_A-4")
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        elif args.use_distil_bert:
            configuration = DistilBertConfig()
            self.bert_encoder = DistilBertModel(configuration)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        # self.tgt_embed = self.bert.embeddings
        else:
            self.bert_encoder = BertModel.from_pretrained(
                args.PRETRAINED_MODEL_NAME,
                output_hidden_states=args.distill_2)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed

        self.distill_2 = args.distill_2
        self.gru = nn.GRU(d_model, d_model, 1)
        self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1)
        self.sigmoid = nn.Sigmoid()
        self.device = device
        self.init_weights()
Esempio n. 28
0
def load_pretrained_encoder(mpath,
                            config="albert_config.json",
                            model="albert_model.bin"):

    b_config = BC.from_pretrained(opt.join(mpath, config))
    encoder = AlbertModel.from_pretrained(opt.join(mpath, model),
                                          config=b_config)

    return encoder
Esempio n. 29
0
    def __init__(self, config, num_label):
        super(AlbertQA, self).__init__()
        self.albert = AlbertModel.from_pretrained('albert-xxlarge-v1')
        self.fc = nn.Linear(config.hidden_size, num_label)
        self.drop = nn.Dropout(config.hidden_dropout_prob)
        self.loss = nn.CrossEntropyLoss(reduction='sum')

        torch.nn.init.xavier_uniform_(self.fc.weight)
        torch.nn.init.constant_(self.fc.bias, 0.)
Esempio n. 30
0
 def __init__(self, bert_name, num_class, bert_type='bert', drop_out=0.1):
     super(Bert, self).__init__()
     if bert_type == 'bert':
         self.bert = BertModel.from_pretrained(bert_name)
     elif bert_type == 'albert':
         self.bert = AlbertModel.from_pretrained(bert_name)
     else:
         raise Exception('Please enter the correct bert type.')
     self.drop_out = nn.Dropout(p=drop_out)
     self.classifier = nn.Linear(self.bert.config.hidden_size, num_class)