Example #1
0
def model_transfer():
    model = BertForMaskedLM(
        config=BertConfig.from_json_file(args.bert_config_json))
    # print('language_model',model.state_dict()['bert.embeddings.word_embeddings.weight'])
    # print('language_model',model.state_dict()['bert.embeddings.LayerNorm.weight'])
    # print('language_model',model.state_dict()['bert.encoder.layer.0.attention.self.key.weight'])
    model = model.bert
    # print('bert_model',model.state_dict()['embeddings.word_embeddings.weight'])
    # print('bert_model',model.state_dict()['embeddings.LayerNorm.weight'])
    # print('bert_model',model.state_dict()['encoder.layer.0.attention.self.key.weight'])
    model_dict = model.state_dict()
    lm_dict = torch.load('./lm_smallBert/outputs/1.41_150000_step')
    for k, v in lm_dict.items():
        print(k, v)
    # print('lm_dict',lm_dict['bert.embeddings.word_embeddings.weight'])
    # print('lm_dict',lm_dict['bert.embeddings.LayerNorm.weight'])
    # print('lm_dict',lm_dict['bert.encoder.layer.0.attention.self.key.weight'])
    pretrained_dict = {
        k[5:]: v
        for k, v in lm_dict.items() if k[5:] in model_dict.keys()
    }
    # print('pretrained_dict',pretrained_dict)
    model.load_state_dict(pretrained_dict)
    torch.save(model.state_dict(), '1.41_bert_weight.bin')
Example #2
0
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import matthews_corrcoef, f1_score

from pytorch_pretrained_bert.file_utils import  WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForMaskedLM, BertConfig
from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam
print(args.bert_config_json)
vocab_list = []
with open(args.vocab_file, 'r') as fr:
    for line in fr:
        vocab_list.append(line.strip("\n"))
tokenizer = BertTokenizer(vocab_file=args.vocab_file)

model = BertForMaskedLM(config=BertConfig.from_json_file(args.bert_config_json))
model.load_state_dict(torch.load('/home/hing/bert/Pretraining_Bert_From_Scratch/lm_smallBert/outputs/60000_pytorch_model.bin'))
for k,v in model.named_parameters():
    print(k,v)
pretrain_=BertForMaskedLM(args.bert_config_json)
eval_examples = create_examples(data_path=args.pretrain_dev_path,
                                         max_seq_length=args.max_seq_length,
                                         masked_lm_prob=args.masked_lm_prob,
                                         max_predictions_per_seq=args.max_predictions_per_seq,
                                         vocab_list=vocab_list)
eval_features = convert_examples_to_features(
                eval_examples, args.max_seq_length, tokenizer)
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)