def __init__(self): # Load pre-trained model tokenizer (vocabulary) self.tokenizer = BertTokenizer.from_pretrained(self.bert_model) # Load pre-trained model (weights) self.model = BertForPreTraining.from_pretrained('bert-base-uncased') self.model.eval()
def __init__(self, vis_feat_dim=2208, spatial_size=7, hidden_dim=768, cmb_feat_dim=16000, kernel_size=3): """Initialize SkipGramDistNet.""" super(MCBertForPretrainingModel, self).__init__() self.vis_feat_dim = vis_feat_dim self.spatial_size = spatial_size self.hidden_dim = hidden_dim self.cmb_feat_dim = cmb_feat_dim self.kernel_size = kernel_size self.mcbert_model = MCBertModel(vis_feat_dim=vis_feat_dim, spatial_size=spatial_size, hidden_dim=hidden_dim, cmb_feat_dim=cmb_feat_dim, kernel_size=kernel_size) version = "bert-base-cased" bert_model = BertForPreTraining.from_pretrained(version) self.cls = bert_model.cls self.vocab_size = bert_model.config.vocab_size
def get_lm_ranker(bert_model, max_seq_length=100): tokenizer = BertTokenizer.from_pretrained(os.path.join( bert_model, "vocab.txt"), do_lower_case=True) transform = BertLmRankingTransform(tokenizer=tokenizer, max_len=max_seq_length) state_save_path = os.path.join(bert_model, 'model.state') if os.path.exists(state_save_path): state = torch.load(state_save_path, map_location="cpu") model = BertForPreTraining.from_pretrained( bert_model, state_dict=state['model_state']) else: previous_model_file = os.path.join(bert_model, "pytorch_model.bin") model_state_dict = torch.load(previous_model_file, map_location="cpu") model = BertForPreTraining.from_pretrained(bert_model, state_dict=model_state_dict) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() lm_ranker = LmBasedRanker(model, tokenizer, transform, device) return lm_ranker
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) if args.encoder_layers != args.decoder_layers: raise ValueError('--encoder-layers must match --decoder-layers') def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens) if args.encoder_embed_path: pretrained_encoder_embed = load_pretrained_embedding_from_file( args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim) else: num_embeddings = len(task.source_dictionary) pretrained_encoder_embed = Embedding(num_embeddings, args.encoder_embed_dim, task.source_dictionary.pad()) if args.share_all_embeddings: # double check all parameters combinations are valid if task.source_dictionary != task.target_dictionary: raise ValueError( '--share-all-embeddings requires a joint dictionary') if args.decoder_embed_path and (args.decoder_embed_path != args.encoder_embed_path): raise ValueError( '--share-all-embed not compatible with --decoder-embed-path' ) if args.encoder_embed_dim != args.decoder_embed_dim: raise ValueError( '--share-all-embeddings requires --encoder-embed-dim to ' 'match --decoder-embed-dim') pretrained_decoder_embed = pretrained_encoder_embed args.share_decoder_input_output_embed = True else: # separate decoder input embeddings pretrained_decoder_embed = None if args.decoder_embed_path: pretrained_decoder_embed = load_pretrained_embedding_from_file( args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim) # one last double check of parameter combinations if args.share_decoder_input_output_embed and ( args.decoder_embed_dim != args.decoder_out_embed_dim): raise ValueError( '--share-decoder-input-output-embeddings requires ' '--decoder-embed-dim to match --decoder-out-embed-dim') if args.encoder_freeze_embed: pretrained_encoder_embed.weight.requires_grad = False if args.decoder_freeze_embed: pretrained_decoder_embed.weight.requires_grad = False bert_model = BertForPreTraining.from_pretrained(args.bert_base) bert_model.load_state_dict(torch.load(args.bert_finetune)) bert = bert_model.bert encoder = LSTMEncoder(dictionary=task.source_dictionary, embed_dim=args.encoder_embed_dim, hidden_size=args.encoder_hidden_size, num_layers=args.encoder_layers, dropout_in=args.encoder_dropout_in, dropout_out=args.encoder_dropout_out, bidirectional=args.encoder_bidirectional, pretrained_embed=pretrained_encoder_embed, layer=args.layer, bert=bert) decoder = LSTMDecoder( dictionary=task.target_dictionary, embed_dim=args.decoder_embed_dim, hidden_size=args.decoder_hidden_size, out_embed_dim=args.decoder_out_embed_dim, num_layers=args.decoder_layers, dropout_in=args.decoder_dropout_in, dropout_out=args.decoder_dropout_out, attention=options.eval_bool(args.decoder_attention), encoder_output_units=encoder.output_units, pretrained_embed=pretrained_decoder_embed, share_input_output_embed=args.share_decoder_input_output_embed, adaptive_softmax_cutoff=(options.eval_str_list( args.adaptive_softmax_cutoff, type=int) if args.criterion == 'adaptive_loss' else None), ) return cls(encoder, decoder)
import matplotlib import matplotlib.pyplot as plt from pylab import rcParams import torch import torch.nn.functional as F from pytorch_pretrained_bert import tokenization, BertTokenizer, BertModel, BertForMaskedLM, BertForPreTraining, BertConfig from examples.extract_features import * tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') CONFIG_NAME = 'bert_config.json' BERT_DIR = '/nas/pretrain-bert/pretrain-tensorflow/uncased_L-12_H-768_A-12/' config_file = os.path.join(BERT_DIR, CONFIG_NAME) config = BertConfig.from_json_file(config_file) model = BertForPreTraining.from_pretrained(BERT_DIR) model.eval() class Args: def __init__(self): pass args = Args() args.no_cuda = False device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") model.to(device)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch_size', default=1, type=int, help='Batch size for inference') parser.add_argument( '--bert_model', default='bert-base-cased', type=str, help= 'Bert pre-trained model selected, e.g. bert-base-uncased, bert-large-uncased, bert-base-multilingual-case, bert-base-chinese' ) parser.add_argument( '--max_seq_length', default=128, type=int, help='Maximum total input sequence length after tokenization') args = parser.parse_args() input_ids = torch.zeros([args.batch_size, args.max_seq_length], dtype=torch.long) token_type_ids = torch.zeros([args.batch_size, args.max_seq_length], dtype=torch.long) # Export various BERT models # Note: For argument definitions used here see modeling.py from pytorch-pretrained-bert # repository # # Fully trained models model = BertModel.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForMaskedLM.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_maskedlm_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForNextSentencePrediction.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_nextsentence_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') model = BertForPreTraining.from_pretrained(args.bert_model) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_pretraining_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.onnx') # Partially trained models model = BertForSequenceClassification.from_pretrained(args.bert_model, 2) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_classify_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx') model = BertForTokenClassification.from_pretrained(args.bert_model, 2) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_tokenclassify_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx') # Returns error on ONNX export about "squeeze with negative axis -1 might cause onnx model to be incorrect, so commented out. # # model = BertForQuestionAnswering.from_pretrained(args.bert_model) # torch.onnx.export(model,(input_ids,token_type_ids),'bert_question_'+'batch'+str(args.batch_size)+'_'+args.bert_model+'.untrained.onnx') choices = 2 input_ids = torch.zeros([args.batch_size, choices, args.max_seq_length], dtype=torch.long) token_type_ids = torch.zeros( [args.batch_size, choices, args.max_seq_length], dtype=torch.long) model = BertForMultipleChoice.from_pretrained(args.bert_model, choices) torch.onnx.export( model, (input_ids, token_type_ids), 'bert_multiplechoice_' + 'batch' + str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx')