def __init__(self,
                 heads: List[TransformerHead],
                 transformer_weights: str,
                 model_storage_directory=None,
                 evaluation_interval=1,
                 checkpoint_interval=1,
                 device='cuda',
                 learning_rate=5e-5,
                 transformer_layers=12,
                 use_pretrained_heads=True):
        """
        
        :param model_directory: a directory path to the multi-tasking model. This contains bert weights and head weights.
        """
        self.transformer_weights = transformer_weights
        self.heads = heads
        self.model_storage_directory = model_storage_directory
        self.transformer_layers = transformer_layers

        self.device = device

        self.bert_tokenizer = BertTokenizer.from_pretrained(self.transformer_weights)
        if os.path.exists(self.transformer_weights):
            if os.path.exists(os.path.join(self.transformer_weights, CONFIG_NAME)):
                config = BertConfig.from_json_file(os.path.join(self.transformer_weights, CONFIG_NAME))
            elif os.path.exists(os.path.join(self.transformer_weights, 'bert_config.json')):
                config = BertConfig.from_json_file(os.path.join(self.transformer_weights, 'bert_config.json'))
            else:
                raise ValueError("Cannot find a configuration for the BERT based model you are attempting to load.")
        else:
            config = BertConfig.from_pretrained(self.transformer_weights)

        config.output_hidden_states = True

        use_tf_model = 'biobert_v1' in self.transformer_weights
        self.bert = BertModel.from_pretrained(self.transformer_weights, config=config, from_tf=use_tf_model)

        for head in heads:
            if use_pretrained_heads:
                if head.from_pretrained(self.transformer_weights):
                    log.info(f"Loading pretrained head: {head}")
                else:
                    log.info(f"Training new head: {head}")
                if getattr(head, '_init_mlm_head', None): #lm heads required bert model configurations.
                    head._init_mlm_head(config)
            else:
                log.info(f"Training new head: {head}")

        if not hasattr(self, 'epoch'):
            self.epoch = 0

        self.optimizer = torch.optim.Adam(
            self.bert.parameters(),
            weight_decay=0,
            lr=learning_rate
        )
    def get_model(targets=ALL_TARGETS):
        config = BertConfig.from_json_file(args.model_dir /
                                           "stackx-base-cased-config.json")
        config.__dict__["num_labels"] = len(targets)

        model = BertForQuestRegression(config)
        return model
Esempio n. 3
0
def main():
    pretrained_path = './nuilm_small/'
    vocab_path = os.path.join(pretrained_path,'vocab.txt')
    #new_token_dict, keep_tokens = load_vocab(vocab_path,simplified=True,startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]'])
    #tokenizer = BertTokenizer(new_token_dict)
    tokenizer = BertTokenizer.from_pretrained(pretrained_path)
    vocab_size = tokenizer.vocab_size
    print(vocab_size)
    config_path = os.path.join(pretrained_path,'config.json')
    config = BertConfig.from_json_file(config_path)
    MAX_LEN = 3072
    batch_size = 8
    data  = load_data('../pre_train_summary/nuion_data_pre.json')
    print(len(data))
    print(data[0][0])
    print(data[0][1])
    valid_data = data[:1]
    train_data = data[1:]
    train_generator = data_generator(train_data,batch_size,MAX_LEN,0,tokenizer)

    K.clear_session()
    strategy = tf.distribute.MirroredStrategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
    with strategy.scope():
        model = build_model(pretrained_path,config,MAX_LEN,vocab_size)#,keep_tokens)
    
    epochs = 5
    autotitle = AutoTitle(start_id=None, end_id=tokenizer.vocab['[SEP]'],maxlen=600,model=model)
    evaluator = Evaluator(tokenizer,MAX_LEN,autotitle,valid_data)
    model.fit_generator(train_generator.forfit(),steps_per_epoch=len(train_generator)-1,epochs=epochs,callbacks=[evaluator])
Esempio n. 4
0
 def __init__(self,
              image_root: str,
              scibert_path: str,
              lazy: bool = False,
              limit: int = None,
              max_sequence_length: int = 512,
              different_type_for_refs: bool = True,
              use_refs: bool = True):
     super().__init__(lazy)
     self.image_root = image_root
     config = BertConfig.from_json_file(
         os.path.join(scibert_path, 'config.json'))
     self.tokenizer = BertTokenizer(config=config,
                                    vocab_file=os.path.join(
                                        scibert_path, 'vocab.txt'))
     self.token_indexer = {
         'tokens':
         BertFromConfigIndexer(config=config,
                               vocab_path=os.path.join(
                                   scibert_path, 'vocab.txt'),
                               namespace='bert_tokens')
     }
     expected_img_size = 224
     self.image_transform = transforms.Compose([
         transforms.Resize(expected_img_size),
         transforms.CenterCrop(expected_img_size),
         transforms.ToTensor(),
         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
     self.use_refs = use_refs
     self.different_type_for_refs = different_type_for_refs
     self.limit = limit
     self.max_sequence_length = max_sequence_length
     self.word_tokenizer = WordTokenizer()
     self.caption_field = "caption"
Esempio n. 5
0
 def __init__(self):
     super(DialogEncoder, self).__init__()
     config = BertConfig.from_json_file('config/bert_base_baseline.json')
     self.bert_pretrained = BertForPretrainingDialog.from_pretrained('bert-base-uncased', output_hidden_states=True)
     self.bert_pretrained.train()
     # add additional layers for the inconsistency loss
     assert self.bert_pretrained.config.output_hidden_states == True
Esempio n. 6
0
def main():
    pretrained_path = './torch_unilm_model'
    vocab_path = os.path.join(pretrained_path, 'vocab.txt')
    new_token_dict, keep_tokens = load_vocab(
        vocab_path,
        simplified=True,
        startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]'])
    tokenizer = BertTokenizer(new_token_dict)
    vocab_size = tokenizer.vocab_size
    print(vocab_size)

    config_path = os.path.join(pretrained_path, 'config.json')
    config = BertConfig.from_json_file(config_path)
    MAX_LEN = 256
    txts = glob.glob('./THUCNews/*/*.txt')
    batch_size = 8
    train_generator = data_generator(txts, batch_size, MAX_LEN, tokenizer)
    model = build_model(pretrained_path, config, MAX_LEN, vocab_size,
                        keep_tokens)
    steps_per_epoch = 1000
    epochs = 10000
    autotitle = AutoTitle(start_id=None,
                          end_id=new_token_dict['[SEP]'],
                          maxlen=32,
                          model=model)
    evaluator = Evaluator(tokenizer, MAX_LEN, autotitle)
    model.fit_generator(train_generator.forfit(),
                        steps_per_epoch=steps_per_epoch,
                        epochs=epochs,
                        callbacks=[evaluator])
def chat(folder_bert, voc, testing=False):
    tf.random.set_seed(1)
    tokenizer = BertTokenizer(vocab_file=folder_bert + voc)
    if testing:
        tokens = tokenizer.tokenize("jeg tror det skal regne")
        print(tokens)
        ids = tokenizer.convert_tokens_to_ids(tokens)
        print(ids)
        print("Vocab size:", len(tokenizer.vocab))

    config = BertConfig.from_json_file(folder_bert + "/config.json")
    model = BertLMHeadModel.from_pretrained(folder_bert, config=config)
    while (1):
        text = input(">>User: "******"Bot: {}".format(tokenizer.decode(sample_output[0])))
        print("Bot: {}".format(
            tokenizer.decode(sample_output[:, input_ids.shape[-1]:][0],
                             skip_special_tokens=True)))
Esempio n. 8
0
 def __init__(self,config, vocab):
     super(BERT_PRETRAINED_MODEL_JAPANESE, self).__init__()
     self.config = config
     self.vocab = vocab
     self.BERT_config = BertConfig.from_json_file('../published_model/bert_spm/bert_config.json')
     self.tokenizer = BertTokenizer.from_pretrained('./spm_model/wiki-ja.vocab.txt')
     self.pretrained_BERT_model = BertModel.from_pretrained('../published_model/bert_spm/pytorch_model.bin',config=self.BERT_config)
def load(args, checkpoint_dir):
    state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth'))
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'module' in k:
            namekey = k[7:]  # remove `module.`
        else:
            namekey = k
        new_state_dict[namekey] = v

    if args.model_type == 'bert':
        config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin'))
        model = BertForSequenceClassification(config)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'cnn':
        model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                         num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'lstm':
        model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                          hidden_size=args.hidden_size, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'char-cnn':
        model = CharCNN(num_features=args.num_features, num_classes=args.num_labels)
        model.load_state_dict(new_state_dict)
    else:
        raise ValueError('model type is not found!')

    return model.to(args.device)
def main(args):
    # Init
    set_seed(args.seed)
    processor = glue_processor[args.task_name.lower()]
    tokenizer = BertTokenizer.from_pretrained(args.model_path,
                                              do_lower_case=True)
    tokenizer.add_special_tokens(
        {"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS})

    # Data
    dev_examples = processor.get_dev_examples(args.data_dir)
    test_examples = processor.get_test_examples(args.data_dir)
    labels = processor.get_labels(args.data_dir)
    dev_data_raw = prepare_data(dev_examples, args.max_seq_len, tokenizer,
                                labels)
    test_data_raw = prepare_data(test_examples, args.max_seq_len, tokenizer,
                                 labels)

    # Model
    model_config = BertConfig.from_json_file(args.bert_config_path)
    model_config.dropout = args.dropout
    model_config.num_labels = len(labels)
    model = Model(model_config)
    ckpt = torch.load(args.model_ckpt_path, map_location='cpu')
    model.load_state_dict(ckpt, strict=False)
    model.to(device)
    evaluate(model, dev_data_raw, 'dev')
    evaluate(model, test_data_raw, 'test')
Esempio n. 11
0
def load_bert_from_tf(BERT_PT_PATH):
    bert_config_file = os.path.join(BERT_PT_PATH, f'config.json')

    bert_tokenizer = bt.from_pretrained(BERT_PT_PATH)
    bert_model = bm.from_pretrained(BERT_PT_PATH)
    bert_config = bc.from_json_file(bert_config_file)
    return bert_model, bert_tokenizer, bert_config
 def __init__(self,
              base_path,
              oov,
              num_labels,
              lstm_hidden_size=128,
              dropout=0.3,
              lm_flag=False):
     super(Bert_CRF, self).__init__()
     bert_config = BertConfig.from_json_file(
         os.path.join(base_path, 'config.json'))
     bert_config.num_labels = num_labels
     #hidden_states (tuple(torch.FloatTensor), optional, returned when config.output_hidden_states=True):
     bert_config.output_hidden_states = True
     bert_config.output_attentions = True
     self.bert = BertModel.from_pretrained(os.path.join(
         base_path, 'pytorch_model.bin'),
                                           config=bert_config)
     self.tokenizer = tokenizer
     self.oov = oov
     self._oov_embed()
     self.dropout = nn.Dropout(dropout)
     #lstm input_size = bert_config.hidden_size  hidden_size(第二个参数)= 跟Linear 的第一个参数对上
     # 尝试下双向LSTM
     self.lm_flag = lm_flag
     self.lstm = nn.LSTM(bert_config.hidden_size,
                         lstm_hidden_size,
                         num_layers=1,
                         bidirectional=True,
                         dropout=0.3,
                         batch_first=True)
     self.clf = nn.Linear(256, bert_config.num_labels + 2)
     self.layer_norm = nn.LayerNorm(lstm_hidden_size * 2)
     self.crf = CRF(target_size=bert_config.num_labels,
                    average_batch=True,
                    use_cuda=True)
Esempio n. 13
0
    def __init__(self, gpu):
        if torch.cuda.is_available() and gpu is not None:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
            print("Using GPU device: {}.".format(str(gpu)))
        from transformers import BertConfig, BertModel, BertTokenizer
        print("Initializing pretrained SciBERT model.")
        # Load pre-trained model tokenizer (vocabulary)
        self.tokenizer = BertTokenizer.from_pretrained(self.path_vocabulary)

        # Load pre-trained model (weights)
        configuration = BertConfig.from_json_file(self.path_configuration)
        configuration.output_hidden_states = True
        self.model = BertModel.from_pretrained(self.path_model,
                                               config=configuration)

        # Put the model in "evaluation" mode, meaning feed-forward operation.
        self.model.eval()
        print("SciBERT model initialized.")

        self.embedding_types = {
            "AVG_L": "_average_tokens_last_layer",
            "AVG_2L": "_average_tokens_second_to_last_layer",
            "AVG_SUM_L4": "_average_tokens_sum_last_four_layers",
            "AVG_SUM_ALL": "_average_tokens_sum_all_layers",
            "MAX_2L": "_max_tokens_second_to_last_layer",
            "CONC_AVG_MAX_2L": "_concat_avg_max_tokens_second_to_last_layer",
            "CONC_AVG_MAX_SUM_L4": "_concat_avg_max_sum_last_four_layers",
            "SUM_L": "_sum_last_layer",
            "SUM_2L": "_sum_second_to_last"
        }
Esempio n. 14
0
    def __init__(self, num_choices, bert_config_file, init_embeddings):
        self.num_choices = num_choices
        self.bert_config = BertConfig.from_json_file(bert_config_file)
        BertPreTrainedModel.__init__(self, self.bert_config)

        self.bert = BertModel(self.bert_config)
        self.init_weights()    # 初始化权重参数
        self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob)

        # 用于知识表征的词向量矩阵
        self.vocab_size, self.embed_size = np.shape(init_embeddings)
        self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(init_embeddings), freeze=False)

        #self.classifier = nn.Linear(self.bert_config.hidden_size + self.embed_size, 1)
        self.classifier = nn.Linear(self.embed_size + self.bert_config.hidden_size, 1)
        self.A = nn.Parameter(torch.Tensor(self.bert_config.hidden_size, self.embed_size))
        self.bias = nn.Parameter(torch.Tensor(1))

        # BERT中的[CLS]是先经过Transformer层中MLP最后是layer-norm
        # 然后经过BertPooler层使用nn.Tanh激活的
        self.layer_norm = nn.LayerNorm(self.embed_size, eps=self.bert_config.layer_norm_eps)
        # self.know_activation = ACT2FN["gelu"]
        self.know_activation = nn.Tanh()

        self.activation = nn.Sigmoid()

        nn.init.xavier_normal_(self.A)
        self.bias.data.fill_(0)
Esempio n. 15
0
def main():
    pretrained_path = '/root/zhengyanzhao/comment/emotion_extract/summariztion/torch_unilm_model'
    vocab_path = os.path.join(pretrained_path, 'vocab.txt')
    new_token_dict, keep_tokens = load_vocab(
        vocab_path,
        simplified=True,
        startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]'])
    tokenizer = BertTokenizer(new_token_dict)
    vocab_size = tokenizer.vocab_size
    config_path = os.path.join(pretrained_path, 'config.json')
    config = BertConfig.from_json_file(config_path)
    config.model_type = 'NEZHA'
    MAX_LEN = 820
    batch_size = 1
    data = load_data('sfzy_seq2seq.json')
    fold = 0
    num_folds = 100
    train_data = data_split(data, fold, num_folds, 'train')
    valid_data = data_split(data, fold, num_folds, 'valid')
    train_generator = data_generator(train_data, batch_size, MAX_LEN,
                                     tokenizer)
    model, model_pred = build_model(pretrained_path, config, MAX_LEN,
                                    vocab_size, keep_tokens)
    autotitle = AutoTitle(start_id=None,
                          end_id=new_token_dict['[SEP]'],
                          maxlen=512,
                          model=model_pred)
    evaluator = Evaluator(valid_data, autotitle, tokenizer, MAX_LEN)
    epochs = 50
    model.fit_generator(train_generator.forfit(),
                        steps_per_epoch=len(train_generator),
                        epochs=epochs,
                        callbacks=[evaluator])
Esempio n. 16
0
    def load_bert(self, path, max_length, use_cuda):
        self.tokenizer = BertTokenizer(path=path, max_length=max_length)
        config = BertConfig.from_json_file(os.path.join(path, "config.json"))
        self.encoder = BertModel(path=path, config=config,
                                 use_cuda=use_cuda)  # dense encoder

        return self.encoder, self.tokenizer
Esempio n. 17
0
 def __init__(self, device, bert_config_path=None):
     super(Summarizer, self).__init__()
     self.device = device
     self.bert_config = BertConfig.from_json_file(bert_config_path)
     self.bert = Bert(self.bert_config)
     self.encoder = Classifier(self.bert.model.config.hidden_size)
     self.to(device)
Esempio n. 18
0
    def __init__(self,
                 bert_model: str,
                 float_type,
                 num_labels: int,
                 max_seq_length: int,
                 final_layer_initializer=None):
        super().__init__()

        # 1. define the inputs of the model
        input_word_ids = tf.keras.Input(shape=(max_seq_length, ),
                                        dtype=tf.int32,
                                        name='input_word_ids')
        input_mask = tf.keras.Input(shape=(max_seq_length, ),
                                    dtype=tf.int32,
                                    name='input_mask')
        input_type_ids = tf.keras.Input(shape=(max_seq_length, ),
                                        dtype=tf.int32,
                                        name='input_type_ids')

        # 2. load the bert configuration
        if isinstance(bert_model, str):
            config_file = os.path.join(bert_model, 'bert_config.json')
            bert_config = BertConfig.from_json_file(config_file)
        elif isinstance(bert_model, dict):
            bert_config = BertConfig.from_dict(bert_model)

        # 3. build bert layer to get sequence output
        bert_layer = TFBertModel(config=bert_config, float_type=float_type)
        _, sequence_output = bert_layer(input_word_ids, input_mask,
                                        input_type_ids)

        # 4. restore the bert model checkpoint from the disk
        self.bert = tf.keras.Model(
            inputs=[input_word_ids, input_mask, input_type_ids],
            outputs=[sequence_output])
        if isinstance(bert_model, str):
            init_checkpoint = os.path.join(bert_model, 'bert_model.ckpt')
            checkpoint = tf.train.Checkpoint(model=self.bert)
            checkpoint.restore(
                init_checkpoint).assert_existing_objects_matched()

        # 5. init the initializer
        if final_layer_initializer:
            initializer = final_layer_initializer
        else:
            initializer = tf.keras.initializers.TruncatedNormal(
                stddev=bert_config.initializer_range)

        # 6. define the dropout layer
        self.dropout = tf.keras.layers.Dropout(
            rate=bert_config.hidden_dropout_prob)

        # 7. define the final classifier layer to get logits
        self.classifier = tf.keras.layers.Dense(
            units=num_labels,
            kernel_initializer=initializer,
            activation='softmax',
            name='output_layer',
        )
def launch_bert(training_flag, test_flag):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    if training_flag is not None:
        model = BertForTokenClassification.from_pretrained(
            'bert-base-uncased', num_labels=len(tags_vals))
        ## ---------12 . Optimizer -> weight regularization is  a solution to reduce the overfitting of a deep learning
        """ 
        Last keras optimization 2020 (rates from 0.01 seem to be best hyperparamater )for weight regularization for weights layers
            from keras.layers import LSTM
            from keras.regularizers import l2
        model.add(LSTM(32, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01))) 
        Note :  BERT not include beta an gamma parametres for optimization
        """
        FULL_FINETUNING = True
        if FULL_FINETUNING:
            param_optimizer = list(model.named_parameters())
            no_decay = ['bias', 'gamma', 'beta']
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay_rate':
                0.01
            }, {
                'params': [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay_rate':
                0.0
            }]
        else:
            param_optimizer = list(model.classifier.named_parameters())
            optimizer_grouped_parameters = [{
                "params": [p for n, p in param_optimizer]
            }]
        optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr)

        launch_training(training_path=args.training_data,
                        training_epochs=4,
                        valid_path=args.validate_data,
                        training_batch_size=1,
                        model=model,
                        model_path=args.save + '/config.json',
                        tokenizer=tokenizer,
                        optimizer=optimizer)
    if test_flag is not None:
        if (args.save is not None):
            config = BertConfig.from_json_file(args.save + '/config.json')
            model = BertForTokenClassification.from_pretrained(
                pretrained_model_name_or_path=args.save + '/pytorch_model.bin',
                config=config)
        else:
            model = BertForTokenClassification.from_pretrained(
                'bert-base-uncased', num_labels=len(tags_vals))
        launch_test_directory(test_path=test_flag,
                              model=model,
                              tokenizer=tokenizer)
 def __init__(self, pre_train_dir: str):
     super().__init__()
     self.roberta_encoder = BertModel(
         config=BertConfig.from_json_file(pre_train_dir + "config.json"))
     self.decoder_layer = XLDecoder(
         dim=args["dimension"],
         embedding_matrix=self.roberta_encoder.get_input_embeddings(),
         seq_len=args["max_dec_len"])
Esempio n. 21
0
 def __init__(self, n_classes, dropout, tokens_length, 
               PRE_TRAINED_MODEL_NAME, PRE_TRAINED_MODEL_CONFIG):
   super(SentimentClassifier, self).__init__()
   config = BertConfig.from_json_file(PRE_TRAINED_MODEL_CONFIG)
   self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME, config=config)
   self.bert.resize_token_embeddings(tokens_length)
   self.drop = nn.Dropout(p=dropout)
   self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
Esempio n. 22
0
class Config(object):
    max_seq_length = 16
    vocab_file = MODEL_PATH + "vocab.txt"
    bert_config_file = MODEL_PATH + "bert_config.json"
    # init_checkpoint = MODEL_PATH+"bert_model.bin"
    bert_config = BertConfig.from_json_file(bert_config_file)
    topn = 5
    bigrams = None  # pickle.load(open('bigram_dict_simplified.sav', 'rb'))
Esempio n. 23
0
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, " +
                "or config_filename should be passed into the " +
                "BERT constructor.")

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = BertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = BertModel(config)
        elif pretrained_model_name is not None:
            model = BertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = BertConfig.from_json_file(config_filename)
            model = BertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" +
                " be passed into the BERT constructor")

        model.to(self._device)

        self.add_module("bert", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Esempio n. 24
0
    def __init__(self,
                 *,
                 pretrained_model_name=None,
                 config_filename=None,
                 vocab_size=None,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 max_position_embeddings=512,
                 **kwargs):
        TrainableNM.__init__(self, **kwargs)

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, " +
                "or config_filename should be passed into the " +
                "BERT constructor.")

        if vocab_size is not None:
            config = BertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = BertModel(config)
        elif pretrained_model_name is not None:
            model = BertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = BertConfig.from_json_file(config_filename)
            model = BertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" +
                " be passed into the BERT constructor")

        model.to(self._device)

        self.add_module("bert", model)
        self.config = model.config
        for key, value in self.config.to_dict().items():
            self._local_parameters[key] = value
Esempio n. 25
0
 def load_model(self, bert_config_file_name, pretrained_file_name,
                vocab_size, tagset_size, hidden_dim):
     config = BertConfig.from_json_file(bert_config_file_name)
     self.model = BertModel.from_pretrained(pretrained_file_name,
                                            config=config)
     self.birnncrf = BiRnnCrf(vocab_size=vocab_size,
                              tagset_size=tagset_size,
                              embedding_dim=config.hidden_size,
                              hidden_dim=hidden_dim)
Esempio n. 26
0
 def __init__(self):
     config = BertConfig.from_json_file(
         'resources/sentence_ru_cased_L-12_H-768_A-12_pt/bert_config.json')
     self._bert_tokenizer = BertTokenizer.from_pretrained(
         'resources/sentence_ru_cased_L-12_H-768_A-12_pt',
         from_pt=True,
         config=config,
         do_lower_case=True,
     )
Esempio n. 27
0
 def load_bert(self, path, max_length, use_cuda):
     self.tokenizer = BertTokenizer.from_pretrained(path,
                                                    max_length=max_length)
     config = BertConfig.from_json_file(os.path.join(path, "config.json"))
     self.encoder = BertModel.from_pretrained(path, config=config)
     if use_cuda:
         self.cuda = use_cuda
         self.encoder = self.encoder.cuda()
     return self.encoder, self.tokenizer
Esempio n. 28
0
 def __init__(self, num_choices, bert_config_file):
     self.num_choices = num_choices
     bert_config = BertConfig.from_json_file(bert_config_file)
     BertPreTrainedModel.__init__(self, bert_config)
     self.bert = BertModel(bert_config)
     self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
     self.classifier = nn.Linear(bert_config.hidden_size, 1)
     self.activation = nn.Sigmoid()
     self.init_weights()
Esempio n. 29
0
    def load(self, fname=None):
        if fname is not None:
            self.load_path = fname

        if self.pretrained_bert and not Path(self.pretrained_bert).is_file():
            self.model = BertForSequenceClassification.from_pretrained(
                self.pretrained_bert,
                num_labels=self.n_classes,
                output_attentions=False,
                output_hidden_states=False)
        elif self.bert_config_file and Path(self.bert_config_file).is_file():
            self.bert_config = BertConfig.from_json_file(
                str(expand_path(self.bert_config_file)))

            if self.attention_probs_keep_prob is not None:
                self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob
            if self.hidden_keep_prob is not None:
                self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob
            self.model = BertForSequenceClassification(config=self.bert_config)
        else:
            raise ConfigError("No pre-trained BERT model is given.")

        self.model.to(self.device)

        self.optimizer = getattr(torch.optim, self.optimizer_name)(
            self.model.parameters(), **self.optimizer_parameters)
        if self.lr_scheduler_name is not None:
            self.lr_scheduler = getattr(torch.optim.lr_scheduler,
                                        self.lr_scheduler_name)(
                                            self.optimizer,
                                            **self.lr_scheduler_parameters)

        if self.load_path:
            log.info(f"Load path {self.load_path} is given.")
            if isinstance(self.load_path,
                          Path) and not self.load_path.parent.is_dir():
                raise ConfigError("Provided load path is incorrect!")

            weights_path = Path(self.load_path.resolve())
            weights_path = weights_path.with_suffix(f".pth.tar")
            if weights_path.exists():
                log.info(f"Load path {weights_path} exists.")
                log.info(
                    f"Initializing `{self.__class__.__name__}` from saved.")

                # now load the weights, optimizer from saved
                log.info(f"Loading weights from {weights_path}.")
                checkpoint = torch.load(weights_path, map_location=self.device)
                self.model.load_state_dict(checkpoint["model_state_dict"])
                self.optimizer.load_state_dict(
                    checkpoint["optimizer_state_dict"])
                self.epochs_done = checkpoint.get("epochs_done", 0)
            else:
                log.info(
                    f"Init from scratch. Load path {weights_path} does not exist."
                )
Esempio n. 30
0
 def __init__(self, BERT_PATH):
     self.config = BertConfig.from_json_file(BERT_PATH +
                                             "/bert_config.json")
     self.model = BertForPreTraining.from_pretrained(BERT_PATH +
                                                     "/bert_model.ckpt",
                                                     from_tf=True,
                                                     config=self.config)
     self.tokenizer = BertTokenizer(BERT_PATH + "/vocab.txt")
     self.model.eval()
     self.model.cuda(args.gpu_id)