Exemple #1
0
def convert_tf_checkpoint_to_pytorch():
    # gave error originally. Solution found at: https://github.com/tensorflow/models/issues/2676
    tf_path = 'weights/biobert_v1.1_pubmed/model.ckpt-1000000'
    init_vars = tf.train.list_variables(tf_path)
    excluded = ['BERTAdam', '_power', 'global_step']
    init_vars = list(filter(lambda x: all([True if e not in x[0] else False for e in excluded]), init_vars))
    print(init_vars)

    names = []
    arrays = []

    for name, shape in init_vars:
        print("Loading TF weights {} with shape {}".format(name,shape))
        array = tf.train.load_variable(tf_path,name)
        names.append(name)
        arrays.append(array)

    config = BertConfig.from_json_file('weights/biobert_v1.1_pubmed/bert_config.json')
    print('Building Pytorch model from configuration {}'.format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names,arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(n in ["adam_v", "adam_m", "global_step"] for n in name):
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel' or l[0] == 'gamma':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias' or l[0] == 'beta':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format('weights/'))
    torch.save(model.state_dict(), 'weights/pytorch_weight')
Exemple #2
0
def load_BFTC_from_TF_ckpt(bert_config, ckpt_path, num_labels):
    """
    Helper function for loading model - workaround to prevent error
    """
    config = BertConfig.from_json_file(bert_config)
    model = BertForPreTraining(config)
    load_tf_weights_in_bert(model, ckpt_path)
    state_dict=model.state_dict()
    model = BertForTokenClassification(config, num_labels=num_labels)

    # Load from a PyTorch state_dict
    old_keys = []
    new_keys = []
    for key in state_dict.keys():
        new_key = None
        if 'gamma' in key:
            new_key = key.replace('gamma', 'weight')
        if 'beta' in key:
            new_key = key.replace('beta', 'bias')
        if new_key:
            old_keys.append(key)
            new_keys.append(new_key)
    for old_key, new_key in zip(old_keys, new_keys):
        state_dict[new_key] = state_dict.pop(old_key)

    missing_keys = []
    unexpected_keys = []
    error_msgs = []
    # copy state_dict so _load_from_state_dict can modify it
    metadata = getattr(state_dict, '_metadata', None)
    state_dict = state_dict.copy()
    if metadata is not None:
        state_dict._metadata = metadata

    def load(module, prefix=''):
        local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
        module._load_from_state_dict(
            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
        for name, child in module._modules.items():
            if child is not None:
                load(child, prefix + name + '.')
    start_prefix = ''
    if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()):
        start_prefix = 'bert.'
    load(model, prefix=start_prefix)
    if len(missing_keys) > 0:
        print("Weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, missing_keys))
    if len(unexpected_keys) > 0:
        print("Weights from pretrained model not used in {}: {}".format(
            model.__class__.__name__, unexpected_keys))
    if len(error_msgs) > 0:
        raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
                           model.__class__.__name__, "\n\t".join(error_msgs)))
    return model
Exemple #3
0
def _convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
    # adapated from https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py#L30
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    model = BertForPreTraining(config)

    # Load weights from tf checkpoint
    _load_tf_weights_in_bert(model, tf_checkpoint_path)

    # Save pytorch-model
    torch.save(model.state_dict(), pytorch_dump_path)
 def create_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
     model = BertForPreTraining(config=config)
     model.eval()
     loss = model(input_ids, token_type_ids, input_mask, token_labels, sequence_labels)
     prediction_scores, seq_relationship_score = model(input_ids, token_type_ids, input_mask)
     outputs = {
         "loss": loss,
         "prediction_scores": prediction_scores,
         "seq_relationship_score": seq_relationship_score,
     }
     return outputs
Exemple #5
0
def load_biobert_model(biobert_pth, device):
    """Read saved state dict for biobert model on disk

    Args:
        biobert_pth: str, folder path where model state dictionary and config file are saved
    """
    bert_config_file = os.path.join(biobert_pth, 'bert_config.json')
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)
    model.load_state_dict(torch.load(os.path.join(biobert_pth, 'biobert_statedict.pkl'), map_location=device))
    return model
Exemple #6
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Exemple #7
0
    def __init__(self):
        # Load pre-trained model tokenizer (vocabulary)
        self.tokenizer = BertTokenizer.from_pretrained(self.bert_model)

        # Load pre-trained model (weights)
        self.model = BertForPreTraining.from_pretrained('bert-base-uncased')
        self.model.eval()
    def __init__(self,
                 vis_feat_dim=2208,
                 spatial_size=7,
                 hidden_dim=768,
                 cmb_feat_dim=16000,
                 kernel_size=3):
        """Initialize SkipGramDistNet."""
        super(MCBertForPretrainingModel, self).__init__()
        self.vis_feat_dim = vis_feat_dim
        self.spatial_size = spatial_size
        self.hidden_dim = hidden_dim
        self.cmb_feat_dim = cmb_feat_dim
        self.kernel_size = kernel_size

        self.mcbert_model = MCBertModel(vis_feat_dim=vis_feat_dim,
                                        spatial_size=spatial_size,
                                        hidden_dim=hidden_dim,
                                        cmb_feat_dim=cmb_feat_dim,
                                        kernel_size=kernel_size)

        version = "bert-base-cased"
        bert_model = BertForPreTraining.from_pretrained(version)
        self.cls = bert_model.cls

        self.vocab_size = bert_model.config.vocab_size
Exemple #9
0
def get_lm_ranker(bert_model, max_seq_length=100):
    tokenizer = BertTokenizer.from_pretrained(os.path.join(
        bert_model, "vocab.txt"),
                                              do_lower_case=True)
    transform = BertLmRankingTransform(tokenizer=tokenizer,
                                       max_len=max_seq_length)

    state_save_path = os.path.join(bert_model, 'model.state')
    if os.path.exists(state_save_path):
        state = torch.load(state_save_path, map_location="cpu")
        model = BertForPreTraining.from_pretrained(
            bert_model, state_dict=state['model_state'])
    else:
        previous_model_file = os.path.join(bert_model, "pytorch_model.bin")
        model_state_dict = torch.load(previous_model_file, map_location="cpu")
        model = BertForPreTraining.from_pretrained(bert_model,
                                                   state_dict=model_state_dict)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    lm_ranker = LmBasedRanker(model, tokenizer, transform, device)
    return lm_ranker
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    config_path = os.path.abspath(bert_config_file)
    tf_path = os.path.abspath(tf_checkpoint_path)
    print("Converting TensorFlow checkpoint from {} with config at {}".format(
        tf_path, config_path))
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    excluded = ['BERTAdam', '_power', 'global_step']
    init_vars = list(
        filter(
            lambda x: all([True if e not in x[0] else False
                           for e in excluded]), init_vars))
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading TF weight {} with shape {}".format(name, shape))
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)

    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names, arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(n in ["adam_v", "adam_m", "global_step"] for n in name):
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel' or l[0] == 'gamma':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias' or l[0] == 'beta':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Exemple #11
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        if args.encoder_layers != args.decoder_layers:
            raise ValueError('--encoder-layers must match --decoder-layers')

        def load_pretrained_embedding_from_file(embed_path, dictionary,
                                                embed_dim):
            num_embeddings = len(dictionary)
            padding_idx = dictionary.pad()
            embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
            embed_dict = utils.parse_embedding(embed_path)
            utils.print_embed_overlap(embed_dict, dictionary)
            return utils.load_embedding(embed_dict, dictionary, embed_tokens)

        if args.encoder_embed_path:
            pretrained_encoder_embed = load_pretrained_embedding_from_file(
                args.encoder_embed_path, task.source_dictionary,
                args.encoder_embed_dim)
        else:
            num_embeddings = len(task.source_dictionary)
            pretrained_encoder_embed = Embedding(num_embeddings,
                                                 args.encoder_embed_dim,
                                                 task.source_dictionary.pad())

        if args.share_all_embeddings:
            # double check all parameters combinations are valid
            if task.source_dictionary != task.target_dictionary:
                raise ValueError(
                    '--share-all-embeddings requires a joint dictionary')
            if args.decoder_embed_path and (args.decoder_embed_path !=
                                            args.encoder_embed_path):
                raise ValueError(
                    '--share-all-embed not compatible with --decoder-embed-path'
                )
            if args.encoder_embed_dim != args.decoder_embed_dim:
                raise ValueError(
                    '--share-all-embeddings requires --encoder-embed-dim to '
                    'match --decoder-embed-dim')
            pretrained_decoder_embed = pretrained_encoder_embed
            args.share_decoder_input_output_embed = True
        else:
            # separate decoder input embeddings
            pretrained_decoder_embed = None
            if args.decoder_embed_path:
                pretrained_decoder_embed = load_pretrained_embedding_from_file(
                    args.decoder_embed_path, task.target_dictionary,
                    args.decoder_embed_dim)
        # one last double check of parameter combinations
        if args.share_decoder_input_output_embed and (
                args.decoder_embed_dim != args.decoder_out_embed_dim):
            raise ValueError(
                '--share-decoder-input-output-embeddings requires '
                '--decoder-embed-dim to match --decoder-out-embed-dim')

        if args.encoder_freeze_embed:
            pretrained_encoder_embed.weight.requires_grad = False
        if args.decoder_freeze_embed:
            pretrained_decoder_embed.weight.requires_grad = False

        bert_model = BertForPreTraining.from_pretrained(args.bert_base)
        bert_model.load_state_dict(torch.load(args.bert_finetune))
        bert = bert_model.bert

        encoder = LSTMEncoder(dictionary=task.source_dictionary,
                              embed_dim=args.encoder_embed_dim,
                              hidden_size=args.encoder_hidden_size,
                              num_layers=args.encoder_layers,
                              dropout_in=args.encoder_dropout_in,
                              dropout_out=args.encoder_dropout_out,
                              bidirectional=args.encoder_bidirectional,
                              pretrained_embed=pretrained_encoder_embed,
                              layer=args.layer,
                              bert=bert)
        decoder = LSTMDecoder(
            dictionary=task.target_dictionary,
            embed_dim=args.decoder_embed_dim,
            hidden_size=args.decoder_hidden_size,
            out_embed_dim=args.decoder_out_embed_dim,
            num_layers=args.decoder_layers,
            dropout_in=args.decoder_dropout_in,
            dropout_out=args.decoder_dropout_out,
            attention=options.eval_bool(args.decoder_attention),
            encoder_output_units=encoder.output_units,
            pretrained_embed=pretrained_decoder_embed,
            share_input_output_embed=args.share_decoder_input_output_embed,
            adaptive_softmax_cutoff=(options.eval_str_list(
                args.adaptive_softmax_cutoff, type=int) if args.criterion
                                     == 'adaptive_loss' else None),
        )

        return cls(encoder, decoder)
Exemple #12
0
import matplotlib
import matplotlib.pyplot as plt
from pylab import rcParams

import torch
import torch.nn.functional as F
from pytorch_pretrained_bert import tokenization, BertTokenizer, BertModel, BertForMaskedLM, BertForPreTraining, BertConfig
from examples.extract_features import *

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

CONFIG_NAME = 'bert_config.json'
BERT_DIR = '/nas/pretrain-bert/pretrain-tensorflow/uncased_L-12_H-768_A-12/'
config_file = os.path.join(BERT_DIR, CONFIG_NAME)
config = BertConfig.from_json_file(config_file)
model = BertForPreTraining.from_pretrained(BERT_DIR)
model.eval()


class Args:
    def __init__(self):
        pass


args = Args()
args.no_cuda = False

device = torch.device(
    "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
model.to(device)
Exemple #13
0
akerke_tagged_complaints_path = 'Russian/Alem_Tagged_Complaints/akerke_tagged/pickle/dataset.pkl'
ru_bert_path = '/home/alem/Alem_Sagandykov_Documents/Alem_Social/HERMES/'

vocab_path = 'Production/vocab.txt'
config_path = 'Production/bert_config.json'
ru_bert_pytorch_weights_path_pth = 'pytorch_dump/deeppavlov_pretrained_rubert.pth'
ru_bert_pytorch_weights_path_bin = 'pytorch_dump/rubert_cased_L-12_H-768_A-12_pt/pytorch_model.bin'

vocab = load_vocab(os.path.join(ru_bert_path, vocab_path))

tags2index = {
    'X': 0,
    'O': 1,
    'B-ORG': 2,
    'I-ORG': 3,
    'B-PER': 4,
    'I-PER': 5,
    'B-LOC': 6,
    'I-LOC': 7
}

index2tags = dict(zip(tags2index.values(), tags2index.keys()))

config = BertConfig(os.path.join(ru_bert_path, config_path))
bert_for_pretraining = BertForPreTraining(config)
ru_bert_weights = torch.load(
    os.path.join(ru_bert_path, ru_bert_pytorch_weights_path_bin))
bert_for_pretraining.load_state_dict(ru_bert_weights)

bert_layer = bert_for_pretraining.bert
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size',
                        default=1,
                        type=int,
                        help='Batch size for inference')

    parser.add_argument(
        '--bert_model',
        default='bert-base-cased',
        type=str,
        help=
        'Bert pre-trained model selected, e.g. bert-base-uncased, bert-large-uncased, bert-base-multilingual-case, bert-base-chinese'
    )
    parser.add_argument(
        '--max_seq_length',
        default=128,
        type=int,
        help='Maximum total input sequence length after tokenization')

    args = parser.parse_args()

    input_ids = torch.zeros([args.batch_size, args.max_seq_length],
                            dtype=torch.long)
    token_type_ids = torch.zeros([args.batch_size, args.max_seq_length],
                                 dtype=torch.long)

    # Export various BERT models
    # Note: For argument definitions used here see modeling.py from pytorch-pretrained-bert
    #       repository
    #
    # Fully trained models
    model = BertModel.from_pretrained(args.bert_model)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.onnx')

    model = BertForMaskedLM.from_pretrained(args.bert_model)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_maskedlm_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.onnx')

    model = BertForNextSentencePrediction.from_pretrained(args.bert_model)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_nextsentence_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.onnx')

    model = BertForPreTraining.from_pretrained(args.bert_model)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_pretraining_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.onnx')

    # Partially trained models
    model = BertForSequenceClassification.from_pretrained(args.bert_model, 2)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_classify_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx')

    model = BertForTokenClassification.from_pretrained(args.bert_model, 2)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_tokenclassify_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx')

    # Returns error on ONNX export about "squeeze with negative axis -1 might cause onnx model to be incorrect, so commented out.
    #
    # model = BertForQuestionAnswering.from_pretrained(args.bert_model)
    # torch.onnx.export(model,(input_ids,token_type_ids),'bert_question_'+'batch'+str(args.batch_size)+'_'+args.bert_model+'.untrained.onnx')

    choices = 2
    input_ids = torch.zeros([args.batch_size, choices, args.max_seq_length],
                            dtype=torch.long)
    token_type_ids = torch.zeros(
        [args.batch_size, choices, args.max_seq_length], dtype=torch.long)
    model = BertForMultipleChoice.from_pretrained(args.bert_model, choices)
    torch.onnx.export(
        model, (input_ids, token_type_ids), 'bert_multiplechoice_' + 'batch' +
        str(args.batch_size) + '_' + args.bert_model + '.untrained.onnx')
init_vars = list(filter(lambda x:all([True if e not in x[0] else False for e in excluded]),init_vars))

names = []
arrays = []
for name, shape in init_vars:
    print("Loading TF weight {} with shape {}".format(name, shape))
    array = tf.train.load_variable(tf_path, name)
    names.append(name)
    arrays.append(array)



# Initialise PyTorch model
config = BertConfig.from_json_file('weights/pubmed_pmc_470k/bert_config.json')
print("Building PyTorch model from configuration: {}".format(str(config)))
model = BertForPreTraining(config)


for name, array in zip(names, arrays):
    name = name.split('/')
    # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
    # which are not required for using pretrained model
    if any(n in ["adam_v", "adam_m", "global_step"] for n in name):
        print("Skipping {}".format("/".join(name)))
        continue
    pointer = model
    for m_name in name:
        if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
            l = re.split(r'_(\d+)', m_name)
        else:
            l = [m_name]