def __init__(self, heads: List[TransformerHead], transformer_weights: str, model_storage_directory=None, evaluation_interval=1, checkpoint_interval=1, device='cuda', learning_rate=5e-5, transformer_layers=12, use_pretrained_heads=True): """ :param model_directory: a directory path to the multi-tasking model. This contains bert weights and head weights. """ self.transformer_weights = transformer_weights self.heads = heads self.model_storage_directory = model_storage_directory self.transformer_layers = transformer_layers self.device = device self.bert_tokenizer = BertTokenizer.from_pretrained(self.transformer_weights) if os.path.exists(self.transformer_weights): if os.path.exists(os.path.join(self.transformer_weights, CONFIG_NAME)): config = BertConfig.from_json_file(os.path.join(self.transformer_weights, CONFIG_NAME)) elif os.path.exists(os.path.join(self.transformer_weights, 'bert_config.json')): config = BertConfig.from_json_file(os.path.join(self.transformer_weights, 'bert_config.json')) else: raise ValueError("Cannot find a configuration for the BERT based model you are attempting to load.") else: config = BertConfig.from_pretrained(self.transformer_weights) config.output_hidden_states = True use_tf_model = 'biobert_v1' in self.transformer_weights self.bert = BertModel.from_pretrained(self.transformer_weights, config=config, from_tf=use_tf_model) for head in heads: if use_pretrained_heads: if head.from_pretrained(self.transformer_weights): log.info(f"Loading pretrained head: {head}") else: log.info(f"Training new head: {head}") if getattr(head, '_init_mlm_head', None): #lm heads required bert model configurations. head._init_mlm_head(config) else: log.info(f"Training new head: {head}") if not hasattr(self, 'epoch'): self.epoch = 0 self.optimizer = torch.optim.Adam( self.bert.parameters(), weight_decay=0, lr=learning_rate )
def get_model(targets=ALL_TARGETS): config = BertConfig.from_json_file(args.model_dir / "stackx-base-cased-config.json") config.__dict__["num_labels"] = len(targets) model = BertForQuestRegression(config) return model
def main(): pretrained_path = './nuilm_small/' vocab_path = os.path.join(pretrained_path,'vocab.txt') #new_token_dict, keep_tokens = load_vocab(vocab_path,simplified=True,startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]']) #tokenizer = BertTokenizer(new_token_dict) tokenizer = BertTokenizer.from_pretrained(pretrained_path) vocab_size = tokenizer.vocab_size print(vocab_size) config_path = os.path.join(pretrained_path,'config.json') config = BertConfig.from_json_file(config_path) MAX_LEN = 3072 batch_size = 8 data = load_data('../pre_train_summary/nuion_data_pre.json') print(len(data)) print(data[0][0]) print(data[0][1]) valid_data = data[:1] train_data = data[1:] train_generator = data_generator(train_data,batch_size,MAX_LEN,0,tokenizer) K.clear_session() strategy = tf.distribute.MirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): model = build_model(pretrained_path,config,MAX_LEN,vocab_size)#,keep_tokens) epochs = 5 autotitle = AutoTitle(start_id=None, end_id=tokenizer.vocab['[SEP]'],maxlen=600,model=model) evaluator = Evaluator(tokenizer,MAX_LEN,autotitle,valid_data) model.fit_generator(train_generator.forfit(),steps_per_epoch=len(train_generator)-1,epochs=epochs,callbacks=[evaluator])
def __init__(self, image_root: str, scibert_path: str, lazy: bool = False, limit: int = None, max_sequence_length: int = 512, different_type_for_refs: bool = True, use_refs: bool = True): super().__init__(lazy) self.image_root = image_root config = BertConfig.from_json_file( os.path.join(scibert_path, 'config.json')) self.tokenizer = BertTokenizer(config=config, vocab_file=os.path.join( scibert_path, 'vocab.txt')) self.token_indexer = { 'tokens': BertFromConfigIndexer(config=config, vocab_path=os.path.join( scibert_path, 'vocab.txt'), namespace='bert_tokens') } expected_img_size = 224 self.image_transform = transforms.Compose([ transforms.Resize(expected_img_size), transforms.CenterCrop(expected_img_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) self.use_refs = use_refs self.different_type_for_refs = different_type_for_refs self.limit = limit self.max_sequence_length = max_sequence_length self.word_tokenizer = WordTokenizer() self.caption_field = "caption"
def __init__(self): super(DialogEncoder, self).__init__() config = BertConfig.from_json_file('config/bert_base_baseline.json') self.bert_pretrained = BertForPretrainingDialog.from_pretrained('bert-base-uncased', output_hidden_states=True) self.bert_pretrained.train() # add additional layers for the inconsistency loss assert self.bert_pretrained.config.output_hidden_states == True
def main(): pretrained_path = './torch_unilm_model' vocab_path = os.path.join(pretrained_path, 'vocab.txt') new_token_dict, keep_tokens = load_vocab( vocab_path, simplified=True, startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]']) tokenizer = BertTokenizer(new_token_dict) vocab_size = tokenizer.vocab_size print(vocab_size) config_path = os.path.join(pretrained_path, 'config.json') config = BertConfig.from_json_file(config_path) MAX_LEN = 256 txts = glob.glob('./THUCNews/*/*.txt') batch_size = 8 train_generator = data_generator(txts, batch_size, MAX_LEN, tokenizer) model = build_model(pretrained_path, config, MAX_LEN, vocab_size, keep_tokens) steps_per_epoch = 1000 epochs = 10000 autotitle = AutoTitle(start_id=None, end_id=new_token_dict['[SEP]'], maxlen=32, model=model) evaluator = Evaluator(tokenizer, MAX_LEN, autotitle) model.fit_generator(train_generator.forfit(), steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=[evaluator])
def chat(folder_bert, voc, testing=False): tf.random.set_seed(1) tokenizer = BertTokenizer(vocab_file=folder_bert + voc) if testing: tokens = tokenizer.tokenize("jeg tror det skal regne") print(tokens) ids = tokenizer.convert_tokens_to_ids(tokens) print(ids) print("Vocab size:", len(tokenizer.vocab)) config = BertConfig.from_json_file(folder_bert + "/config.json") model = BertLMHeadModel.from_pretrained(folder_bert, config=config) while (1): text = input(">>User: "******"Bot: {}".format(tokenizer.decode(sample_output[0]))) print("Bot: {}".format( tokenizer.decode(sample_output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)))
def __init__(self,config, vocab): super(BERT_PRETRAINED_MODEL_JAPANESE, self).__init__() self.config = config self.vocab = vocab self.BERT_config = BertConfig.from_json_file('../published_model/bert_spm/bert_config.json') self.tokenizer = BertTokenizer.from_pretrained('./spm_model/wiki-ja.vocab.txt') self.pretrained_BERT_model = BertModel.from_pretrained('../published_model/bert_spm/pytorch_model.bin',config=self.BERT_config)
def load(args, checkpoint_dir): state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth')) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' in k: namekey = k[7:] # remove `module.` else: namekey = k new_state_dict[namekey] = v if args.model_type == 'bert': config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin')) model = BertForSequenceClassification(config) model.load_state_dict(new_state_dict) elif args.model_type == 'cnn': model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'lstm': model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, hidden_size=args.hidden_size, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'char-cnn': model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) model.load_state_dict(new_state_dict) else: raise ValueError('model type is not found!') return model.to(args.device)
def main(args): # Init set_seed(args.seed) processor = glue_processor[args.task_name.lower()] tokenizer = BertTokenizer.from_pretrained(args.model_path, do_lower_case=True) tokenizer.add_special_tokens( {"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS}) # Data dev_examples = processor.get_dev_examples(args.data_dir) test_examples = processor.get_test_examples(args.data_dir) labels = processor.get_labels(args.data_dir) dev_data_raw = prepare_data(dev_examples, args.max_seq_len, tokenizer, labels) test_data_raw = prepare_data(test_examples, args.max_seq_len, tokenizer, labels) # Model model_config = BertConfig.from_json_file(args.bert_config_path) model_config.dropout = args.dropout model_config.num_labels = len(labels) model = Model(model_config) ckpt = torch.load(args.model_ckpt_path, map_location='cpu') model.load_state_dict(ckpt, strict=False) model.to(device) evaluate(model, dev_data_raw, 'dev') evaluate(model, test_data_raw, 'test')
def load_bert_from_tf(BERT_PT_PATH): bert_config_file = os.path.join(BERT_PT_PATH, f'config.json') bert_tokenizer = bt.from_pretrained(BERT_PT_PATH) bert_model = bm.from_pretrained(BERT_PT_PATH) bert_config = bc.from_json_file(bert_config_file) return bert_model, bert_tokenizer, bert_config
def __init__(self, base_path, oov, num_labels, lstm_hidden_size=128, dropout=0.3, lm_flag=False): super(Bert_CRF, self).__init__() bert_config = BertConfig.from_json_file( os.path.join(base_path, 'config.json')) bert_config.num_labels = num_labels #hidden_states (tuple(torch.FloatTensor), optional, returned when config.output_hidden_states=True): bert_config.output_hidden_states = True bert_config.output_attentions = True self.bert = BertModel.from_pretrained(os.path.join( base_path, 'pytorch_model.bin'), config=bert_config) self.tokenizer = tokenizer self.oov = oov self._oov_embed() self.dropout = nn.Dropout(dropout) #lstm input_size = bert_config.hidden_size hidden_size(第二个参数)= 跟Linear 的第一个参数对上 # 尝试下双向LSTM self.lm_flag = lm_flag self.lstm = nn.LSTM(bert_config.hidden_size, lstm_hidden_size, num_layers=1, bidirectional=True, dropout=0.3, batch_first=True) self.clf = nn.Linear(256, bert_config.num_labels + 2) self.layer_norm = nn.LayerNorm(lstm_hidden_size * 2) self.crf = CRF(target_size=bert_config.num_labels, average_batch=True, use_cuda=True)
def __init__(self, gpu): if torch.cuda.is_available() and gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu) print("Using GPU device: {}.".format(str(gpu))) from transformers import BertConfig, BertModel, BertTokenizer print("Initializing pretrained SciBERT model.") # Load pre-trained model tokenizer (vocabulary) self.tokenizer = BertTokenizer.from_pretrained(self.path_vocabulary) # Load pre-trained model (weights) configuration = BertConfig.from_json_file(self.path_configuration) configuration.output_hidden_states = True self.model = BertModel.from_pretrained(self.path_model, config=configuration) # Put the model in "evaluation" mode, meaning feed-forward operation. self.model.eval() print("SciBERT model initialized.") self.embedding_types = { "AVG_L": "_average_tokens_last_layer", "AVG_2L": "_average_tokens_second_to_last_layer", "AVG_SUM_L4": "_average_tokens_sum_last_four_layers", "AVG_SUM_ALL": "_average_tokens_sum_all_layers", "MAX_2L": "_max_tokens_second_to_last_layer", "CONC_AVG_MAX_2L": "_concat_avg_max_tokens_second_to_last_layer", "CONC_AVG_MAX_SUM_L4": "_concat_avg_max_sum_last_four_layers", "SUM_L": "_sum_last_layer", "SUM_2L": "_sum_second_to_last" }
def __init__(self, num_choices, bert_config_file, init_embeddings): self.num_choices = num_choices self.bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, self.bert_config) self.bert = BertModel(self.bert_config) self.init_weights() # 初始化权重参数 self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob) # 用于知识表征的词向量矩阵 self.vocab_size, self.embed_size = np.shape(init_embeddings) self.embed = nn.Embedding.from_pretrained(torch.FloatTensor(init_embeddings), freeze=False) #self.classifier = nn.Linear(self.bert_config.hidden_size + self.embed_size, 1) self.classifier = nn.Linear(self.embed_size + self.bert_config.hidden_size, 1) self.A = nn.Parameter(torch.Tensor(self.bert_config.hidden_size, self.embed_size)) self.bias = nn.Parameter(torch.Tensor(1)) # BERT中的[CLS]是先经过Transformer层中MLP最后是layer-norm # 然后经过BertPooler层使用nn.Tanh激活的 self.layer_norm = nn.LayerNorm(self.embed_size, eps=self.bert_config.layer_norm_eps) # self.know_activation = ACT2FN["gelu"] self.know_activation = nn.Tanh() self.activation = nn.Sigmoid() nn.init.xavier_normal_(self.A) self.bias.data.fill_(0)
def main(): pretrained_path = '/root/zhengyanzhao/comment/emotion_extract/summariztion/torch_unilm_model' vocab_path = os.path.join(pretrained_path, 'vocab.txt') new_token_dict, keep_tokens = load_vocab( vocab_path, simplified=True, startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]']) tokenizer = BertTokenizer(new_token_dict) vocab_size = tokenizer.vocab_size config_path = os.path.join(pretrained_path, 'config.json') config = BertConfig.from_json_file(config_path) config.model_type = 'NEZHA' MAX_LEN = 820 batch_size = 1 data = load_data('sfzy_seq2seq.json') fold = 0 num_folds = 100 train_data = data_split(data, fold, num_folds, 'train') valid_data = data_split(data, fold, num_folds, 'valid') train_generator = data_generator(train_data, batch_size, MAX_LEN, tokenizer) model, model_pred = build_model(pretrained_path, config, MAX_LEN, vocab_size, keep_tokens) autotitle = AutoTitle(start_id=None, end_id=new_token_dict['[SEP]'], maxlen=512, model=model_pred) evaluator = Evaluator(valid_data, autotitle, tokenizer, MAX_LEN) epochs = 50 model.fit_generator(train_generator.forfit(), steps_per_epoch=len(train_generator), epochs=epochs, callbacks=[evaluator])
def load_bert(self, path, max_length, use_cuda): self.tokenizer = BertTokenizer(path=path, max_length=max_length) config = BertConfig.from_json_file(os.path.join(path, "config.json")) self.encoder = BertModel(path=path, config=config, use_cuda=use_cuda) # dense encoder return self.encoder, self.tokenizer
def __init__(self, device, bert_config_path=None): super(Summarizer, self).__init__() self.device = device self.bert_config = BertConfig.from_json_file(bert_config_path) self.bert = Bert(self.bert_config) self.encoder = Classifier(self.bert.model.config.hidden_size) self.to(device)
def __init__(self, bert_model: str, float_type, num_labels: int, max_seq_length: int, final_layer_initializer=None): super().__init__() # 1. define the inputs of the model input_word_ids = tf.keras.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_word_ids') input_mask = tf.keras.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_mask') input_type_ids = tf.keras.Input(shape=(max_seq_length, ), dtype=tf.int32, name='input_type_ids') # 2. load the bert configuration if isinstance(bert_model, str): config_file = os.path.join(bert_model, 'bert_config.json') bert_config = BertConfig.from_json_file(config_file) elif isinstance(bert_model, dict): bert_config = BertConfig.from_dict(bert_model) # 3. build bert layer to get sequence output bert_layer = TFBertModel(config=bert_config, float_type=float_type) _, sequence_output = bert_layer(input_word_ids, input_mask, input_type_ids) # 4. restore the bert model checkpoint from the disk self.bert = tf.keras.Model( inputs=[input_word_ids, input_mask, input_type_ids], outputs=[sequence_output]) if isinstance(bert_model, str): init_checkpoint = os.path.join(bert_model, 'bert_model.ckpt') checkpoint = tf.train.Checkpoint(model=self.bert) checkpoint.restore( init_checkpoint).assert_existing_objects_matched() # 5. init the initializer if final_layer_initializer: initializer = final_layer_initializer else: initializer = tf.keras.initializers.TruncatedNormal( stddev=bert_config.initializer_range) # 6. define the dropout layer self.dropout = tf.keras.layers.Dropout( rate=bert_config.hidden_dropout_prob) # 7. define the final classifier layer to get logits self.classifier = tf.keras.layers.Dense( units=num_labels, kernel_initializer=initializer, activation='softmax', name='output_layer', )
def launch_bert(training_flag, test_flag): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') if training_flag is not None: model = BertForTokenClassification.from_pretrained( 'bert-base-uncased', num_labels=len(tags_vals)) ## ---------12 . Optimizer -> weight regularization is a solution to reduce the overfitting of a deep learning """ Last keras optimization 2020 (rates from 0.01 seem to be best hyperparamater )for weight regularization for weights layers from keras.layers import LSTM from keras.regularizers import l2 model.add(LSTM(32, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01))) Note : BERT not include beta an gamma parametres for optimization """ FULL_FINETUNING = True if FULL_FINETUNING: param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.01 }, { 'params': [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay_rate': 0.0 }] else: param_optimizer = list(model.classifier.named_parameters()) optimizer_grouped_parameters = [{ "params": [p for n, p in param_optimizer] }] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr) launch_training(training_path=args.training_data, training_epochs=4, valid_path=args.validate_data, training_batch_size=1, model=model, model_path=args.save + '/config.json', tokenizer=tokenizer, optimizer=optimizer) if test_flag is not None: if (args.save is not None): config = BertConfig.from_json_file(args.save + '/config.json') model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path=args.save + '/pytorch_model.bin', config=config) else: model = BertForTokenClassification.from_pretrained( 'bert-base-uncased', num_labels=len(tags_vals)) launch_test_directory(test_path=test_flag, model=model, tokenizer=tokenizer)
def __init__(self, pre_train_dir: str): super().__init__() self.roberta_encoder = BertModel( config=BertConfig.from_json_file(pre_train_dir + "config.json")) self.decoder_layer = XLDecoder( dim=args["dimension"], embedding_matrix=self.roberta_encoder.get_input_embeddings(), seq_len=args["max_dec_len"])
def __init__(self, n_classes, dropout, tokens_length, PRE_TRAINED_MODEL_NAME, PRE_TRAINED_MODEL_CONFIG): super(SentimentClassifier, self).__init__() config = BertConfig.from_json_file(PRE_TRAINED_MODEL_CONFIG) self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME, config=config) self.bert.resize_token_embeddings(tokens_length) self.drop = nn.Dropout(p=dropout) self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
class Config(object): max_seq_length = 16 vocab_file = MODEL_PATH + "vocab.txt" bert_config_file = MODEL_PATH + "bert_config.json" # init_checkpoint = MODEL_PATH+"bert_model.bin" bert_config = BertConfig.from_json_file(bert_config_file) topn = 5 bigrams = None # pickle.load(open('bigram_dict_simplified.sav', 'rb'))
def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") # TK: The following code checks the same once again. if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config self._hidden_size = model.config.hidden_size
def __init__(self, *, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, **kwargs): TrainableNM.__init__(self, **kwargs) # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "BERT constructor.") if vocab_size is not None: config = BertConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = BertModel(config) elif pretrained_model_name is not None: model = BertModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = BertConfig.from_json_file(config_filename) model = BertModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the BERT constructor") model.to(self._device) self.add_module("bert", model) self.config = model.config for key, value in self.config.to_dict().items(): self._local_parameters[key] = value
def load_model(self, bert_config_file_name, pretrained_file_name, vocab_size, tagset_size, hidden_dim): config = BertConfig.from_json_file(bert_config_file_name) self.model = BertModel.from_pretrained(pretrained_file_name, config=config) self.birnncrf = BiRnnCrf(vocab_size=vocab_size, tagset_size=tagset_size, embedding_dim=config.hidden_size, hidden_dim=hidden_dim)
def __init__(self): config = BertConfig.from_json_file( 'resources/sentence_ru_cased_L-12_H-768_A-12_pt/bert_config.json') self._bert_tokenizer = BertTokenizer.from_pretrained( 'resources/sentence_ru_cased_L-12_H-768_A-12_pt', from_pt=True, config=config, do_lower_case=True, )
def load_bert(self, path, max_length, use_cuda): self.tokenizer = BertTokenizer.from_pretrained(path, max_length=max_length) config = BertConfig.from_json_file(os.path.join(path, "config.json")) self.encoder = BertModel.from_pretrained(path, config=config) if use_cuda: self.cuda = use_cuda self.encoder = self.encoder.cuda() return self.encoder, self.tokenizer
def __init__(self, num_choices, bert_config_file): self.num_choices = num_choices bert_config = BertConfig.from_json_file(bert_config_file) BertPreTrainedModel.__init__(self, bert_config) self.bert = BertModel(bert_config) self.dropout = nn.Dropout(bert_config.hidden_dropout_prob) self.classifier = nn.Linear(bert_config.hidden_size, 1) self.activation = nn.Sigmoid() self.init_weights()
def load(self, fname=None): if fname is not None: self.load_path = fname if self.pretrained_bert and not Path(self.pretrained_bert).is_file(): self.model = BertForSequenceClassification.from_pretrained( self.pretrained_bert, num_labels=self.n_classes, output_attentions=False, output_hidden_states=False) elif self.bert_config_file and Path(self.bert_config_file).is_file(): self.bert_config = BertConfig.from_json_file( str(expand_path(self.bert_config_file))) if self.attention_probs_keep_prob is not None: self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob if self.hidden_keep_prob is not None: self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob self.model = BertForSequenceClassification(config=self.bert_config) else: raise ConfigError("No pre-trained BERT model is given.") self.model.to(self.device) self.optimizer = getattr(torch.optim, self.optimizer_name)( self.model.parameters(), **self.optimizer_parameters) if self.lr_scheduler_name is not None: self.lr_scheduler = getattr(torch.optim.lr_scheduler, self.lr_scheduler_name)( self.optimizer, **self.lr_scheduler_parameters) if self.load_path: log.info(f"Load path {self.load_path} is given.") if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") weights_path = Path(self.load_path.resolve()) weights_path = weights_path.with_suffix(f".pth.tar") if weights_path.exists(): log.info(f"Load path {weights_path} exists.") log.info( f"Initializing `{self.__class__.__name__}` from saved.") # now load the weights, optimizer from saved log.info(f"Loading weights from {weights_path}.") checkpoint = torch.load(weights_path, map_location=self.device) self.model.load_state_dict(checkpoint["model_state_dict"]) self.optimizer.load_state_dict( checkpoint["optimizer_state_dict"]) self.epochs_done = checkpoint.get("epochs_done", 0) else: log.info( f"Init from scratch. Load path {weights_path} does not exist." )
def __init__(self, BERT_PATH): self.config = BertConfig.from_json_file(BERT_PATH + "/bert_config.json") self.model = BertForPreTraining.from_pretrained(BERT_PATH + "/bert_model.ckpt", from_tf=True, config=self.config) self.tokenizer = BertTokenizer(BERT_PATH + "/vocab.txt") self.model.eval() self.model.cuda(args.gpu_id)