Esempio n. 1
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)  
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)
    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, config, tf_checkpoint_path)  
    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)  
def convert_tf2_checkpoint_to_pytorch(tf_checkpoint_path, config_path, pytorch_dump_path):
    # Initialize PyTorch model
    logger.info(f"Loading model based on config from {config_path}...")
    config = BertConfig.from_json_file(config_path)
    model = BertForPreTraining(config)

    # Load weights from tf checkpoint
    logger.info(f"Loading weights from checkpoint {tf_checkpoint_path}...")
    load_tf2_weights_in_bert(model, tf_checkpoint_path, config)

    # Save pytorch-model
    logger.info(f"Saving PyTorch model to {pytorch_dump_path}...")
    torch.save(model.state_dict(), pytorch_dump_path)
Esempio n. 3
0
def convert_tf_checkpoint_to_pytorch(config):
    tf_checkpoint_path = config["tf_checkpoint_path"]
    bert_config_file = config["bert_config_file"]
    pytorch_dump_path = Path(config["pytorch_dump_path"])
    # 初始化pytorch模型
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    # 加载tf权重
    load_tf_weights_in_bert(model, config, tf_checkpoint_path)

    # 保持pytorch模型
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Esempio n. 4
0
    def download(self):
        # Iterate over urls: download, unzip, verify sha256sum
        found_mismatch_sha = False
        for model in self.model_urls:
          url = self.model_urls[model][0]
          file = self.save_path + '/' + self.model_urls[model][1]

          print('Downloading', url)
          response = urllib.request.urlopen(url)
          with open(file, 'wb') as handle:
            handle.write(response.read())

          print('Unzipping', file)
          zip = zipfile.ZipFile(file, 'r')
          extract_to_path = pathlib.Path(self.save_path) / (pathlib.Path(file).stem if model == "bert_tiny_uncased" else "")
          zip.extractall(path=extract_to_path)
          zip.close()

          sha_dict = self.model_sha[model]
          for extracted_file in sha_dict:
            sha = sha_dict[extracted_file]
            if sha != self.sha256sum(file[:-4] + '/' + extracted_file):
              found_mismatch_sha = True
              print('SHA256sum does not match on file:', extracted_file, 'from download url:', url)
            else:
              print(file[:-4] + '/' + extracted_file, '\t', 'verified')

          config = BertConfig.from_json_file(extract_to_path / "bert_config.json")
          print("Building PyTorch model from configuration: {}".format(str(config)))
          model = BertForPreTraining(config)

          # Load weights from tf checkpoint
          load_tf_weights_in_bert(model, config, extract_to_path / "bert_model.ckpt")

          # Save pytorch-model
          print("Save PyTorch model to {}".format(extract_to_path))
          torch.save({'model': model.state_dict(),
                      'optimizer': None,
                      'master params': None,
                      'files': None,
                      'epoch': None,
                      'data_loader': None}, extract_to_path / "ckpt_pretrained.pt")

        if not found_mismatch_sha:
          print("All downloads pass sha256sum verification.")
Esempio n. 5
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    """
    :param tf_checkpoint_path: Path to the TensorFlow checkpoint path.
    :param bert_config_file: The config json file corresponding to the pre-trained BERT model.
    :param pytorch_dump_path: Path to the output PyTorch model.
    :return:
    """
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, config, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    '''
        tf_checkpoint_path: ckpt文件
        bert_config_file: json文件
        pytorch_dump_path: pytorch模型保存位置
    '''

    # 初始化pytorch模型
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    # 从checkpoint中加载权重
    load_tf_weights_in_bert(model, config, tf_checkpoint_path)

    # 保存pytorch模型
    print("Save Pytorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
def convert_tf2_checkpoint_to_pytorch(tf_checkpoint_path, config_path,
                                      output_folder):
    # Instantiate model
    logger.info(f'Loading model based on config from {config_path}...')
    config = BertConfig.from_json_file(config_path)
    model = BertForPreTraining(config)

    # Load weights from checkpoint
    logger.info(f'Loading weights from checkpoint {tf_checkpoint_path}...')
    load_tf2_weights_in_bert(model, tf_checkpoint_path, config)

    # Create dirs
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    # Save pytorch-model
    f_out_model = os.path.join(output_folder, 'pytorch_model.bin')
    logger.info(f'Saving PyTorch model to {f_out_model}...')
    torch.save(model.state_dict(), f_out_model)

    # Save config to output
    f_out_config = os.path.join(output_folder, 'config.json')
    logger.info(f'Saving config to {f_out_config}...')
    config.to_json_file(f_out_config)
Esempio n. 8
0
compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
use_bytescheduler = True
import bytescheduler.pytorch.horovod as bsc
bsc.init()

# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(optimizer,
                                     named_parameters=model.named_parameters(),
                                     compression=compression,
                                     op=hvd.Average)
optimizer = bsc.ScheduledOptimizer(
    model, optimizer,
    args.num_warmup_batches + args.num_iters * args.num_batches_per_iter)

# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)

max_len = args.sentence_len
batch_size = args.batch_size
#input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
input_ids = (torch.rand(batch_size, max_len) * 2000).long()
attention_masks = torch.rand(batch_size, max_len).long()
token_type_ids = torch.rand(batch_size, max_len).long()
position_ids = (torch.rand(batch_size, max_len) * 10).long()
next_sentence_label = torch.rand(batch_size, 1).long()
masked_lm_labels = torch.rand(batch_size, max_len).long()
batch = (input_ids, attention_masks, token_type_ids, position_ids,
         next_sentence_label, masked_lm_labels)
if args.cuda:
    batch = tuple(item.cuda() for item in batch)
Esempio n. 9
0
def convert_pytorch_checkpoint_to_tf(model: BertForPreTraining, ckpt_dir: str,
                                     model_name: str):
    """
    Args:
        model: BertModel Pytorch model instance to be converted
        ckpt_dir: Tensorflow model directory
        model_name: model name
    Currently supported HF models:
        - Y BertModel
        - N BertForMaskedLM
        - N BertForPreTraining
        - N BertForMultipleChoice
        - N BertForNextSentencePrediction
        - N BertForSequenceClassification
        - N BertForQuestionAnswering
    """

    tensors_to_transpose = ("dense.weight", "attention.self.query",
                            "attention.self.key", "attention.self.value")

    var_map = (
        ("layer.", "layer_"),
        ("word_embeddings.weight", "word_embeddings"),
        ("position_embeddings.weight", "position_embeddings"),
        ("token_type_embeddings.weight", "token_type_embeddings"),
        ("cls.predictions.bias", "cls.predictions.output_bias"),
        (".", "/"),
        ("LayerNorm/weight", "LayerNorm/gamma"),
        ("LayerNorm/bias", "LayerNorm/beta"),
        ("weight", "kernel"),
        ("cls/seq_relationship/bias", "cls/seq_relationship/output_bias"),
        ("cls/seq_relationship/kernel", "cls/seq_relationship/output_weights"),
    )

    if not os.path.isdir(ckpt_dir):
        os.makedirs(ckpt_dir)

    state_dict = model.state_dict()

    def to_tf_var_name(name: str):
        for patt, repl in iter(var_map):
            name = name.replace(patt, repl)
        return "bert/{}".format(name) if not name.startswith("cls") else name

    def create_tf_var(tensor: np.ndarray, name: str, session: tf.Session):
        tf_dtype = tf.dtypes.as_dtype(tensor.dtype)
        tf_var = tf.get_variable(dtype=tf_dtype,
                                 shape=tensor.shape,
                                 name=name,
                                 initializer=tf.zeros_initializer())
        session.run(tf.variables_initializer([tf_var]))
        session.run(tf_var)
        return tf_var

    tf.reset_default_graph()
    with tf.Session() as session:
        for var_name in state_dict:
            tf_name = to_tf_var_name(var_name)
            torch_tensor = state_dict[var_name].numpy()
            if any([x in var_name for x in tensors_to_transpose]):
                torch_tensor = torch_tensor.T
            tf_var = create_tf_var(tensor=torch_tensor,
                                   name=tf_name,
                                   session=session)
            tf.keras.backend.set_value(tf_var, torch_tensor)
            tf_weight = session.run(tf_var)
            print("Successfully created {}: {}".format(
                tf_name, np.allclose(tf_weight, torch_tensor)))

        saver = tf.train.Saver(tf.trainable_variables())
        saver.save(
            session,
            os.path.join(ckpt_dir,
                         model_name.replace("-", "_") + ".ckpt"))
Esempio n. 10
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):

    print("Converting TensorFlow checkpoint from {} with config at {}".format(
        tf_checkpoint_path, bert_config_file))

    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_checkpoint_path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading TF weight {} with shape {}".format(name, shape))
        array = tf.train.load_variable(tf_checkpoint_path, name)
        names.append(name)
        arrays.append(array)

    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names, arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(n in ["adam_v", "adam_m", "global_step"] for n in name):
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel' or l[0] == 'gamma':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias' or l[0] == 'beta':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)


# if __name__=='__main__':
#     convert_tf_checkpoint_to_pytorch(config.TF_PATH,config.BERT_CONFIG_FILE,config.BERT_WEIGHTS)
def convert_multibert_checkpoint_to_pytorch(tf_checkpoint_path, config_path, save_path):
    tf_path = os.path.abspath(tf_checkpoint_path)
    logger.info(f"Converting TensorFlow checkpoint from {tf_path}")

    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    config = BertConfig.from_pretrained(config_path)
    model = BertForPreTraining(config)

    layer_nums = []
    for full_name, shape in init_vars:
        array = tf.train.load_variable(tf_path, full_name)
        names.append(full_name)
        split_names = full_name.split("/")
        for name in split_names:
            if name.startswith("layer_"):
                layer_nums.append(int(name.split("_")[-1]))

        arrays.append(array)
    logger.info(f"Read a total of {len(arrays):,} layers")

    name_to_array = dict(zip(names, arrays))

    # Check that number of layers match
    assert config.num_hidden_layers == len(list(set(layer_nums)))

    state_dict = model.state_dict()

    # Need to do this explicitly as it is a buffer
    position_ids = state_dict["bert.embeddings.position_ids"]
    new_state_dict = {"bert.embeddings.position_ids": position_ids}

    # Encoder Layers
    for weight_name in names:
        pt_weight_name = weight_name.replace("kernel", "weight").replace("gamma", "weight").replace("beta", "bias")
        name_split = pt_weight_name.split("/")
        for name_idx, name in enumerate(name_split):
            if name.startswith("layer_"):
                name_split[name_idx] = name.replace("_", ".")

        if name_split[-1].endswith("embeddings"):
            name_split.append("weight")

        if name_split[0] == "cls":
            if name_split[-1] == "output_bias":
                name_split[-1] = "bias"
            if name_split[-1] == "output_weights":
                name_split[-1] = "weight"

        if name_split[-1] == "weight" and name_split[-2] == "dense":
            name_to_array[weight_name] = name_to_array[weight_name].T

        pt_weight_name = ".".join(name_split)

        new_state_dict[pt_weight_name] = torch.from_numpy(name_to_array[weight_name])

    new_state_dict["cls.predictions.decoder.weight"] = new_state_dict["bert.embeddings.word_embeddings.weight"].clone()
    new_state_dict["cls.predictions.decoder.bias"] = new_state_dict["cls.predictions.bias"].clone().T
    # Load State Dict
    model.load_state_dict(new_state_dict)

    # Save PreTrained
    logger.info(f"Saving pretrained model to {save_path}")
    model.save_pretrained(save_path)

    return model