Example #1
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    config_path = os.path.abspath(bert_config_file)
    tf_path = os.path.abspath(tf_checkpoint_path)
    print("Converting TensorFlow checkpoint from {} with config at {}".format(
        tf_path, config_path))
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading TF weight {} with shape {}".format(name, shape))
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)

    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names, arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(n in ["adam_v", "adam_m"] for n in name):
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel' or l[0] == 'gamma':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias' or l[0] == 'beta':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
def eval_all():

#    output_model_file = "../../output/best_model"
    output_model_file = MODEL_PATH
    output_config_file = os.path.join('../model_dir/', args.config_name)

    config = BertConfig(output_config_file)
    model = BertForQuestionAnswering(config)
    if not args.no_pai:
        try:
            model.load_state_dict(torch.load(output_model_file))#, map_location='cpu'))
        except:
            model = nn.DataParallel(model)
            model.load_state_dict(torch.load(output_model_file))#, map_location='cpu'))
    else:
        try:
            model.load_state_dict(torch.load(output_model_file, map_location='cpu'))
        except:
            model = nn.DataParallel(model)
            model.load_state_dict(torch.load(output_model_file, map_location='cpu'))

    result_file_path = os.path.join('../metric', args.result_file_name)
    evaluate(model, result_file=result_file_path)
    if not args.no_pai:
        print(os.getcwd())
        pai_file_output = "/Container/thsi_yicui/dureader-bert/Dureader/output"
        client.upload(pai_file_output, result_file_path, overwrite=True)
Example #3
0
    def __init__(self, output_dir: str, bert_config: str, max_seq_length: int):

        bert_config = BertConfig.from_json_file(bert_config)
        bert_config.experimental_gelu = FLAGS.experimental_gelu
        if FLAGS.precision:
            bert_config.precision = FLAGS.precision

        self.session = tf.compat.v1.Session()

        placeholder = tf.compat.v1.placeholder
        input_shape = [None, max_seq_length]
        self.input_ids = placeholder(tf.int32, input_shape, name='input_ids')
        self.input_mask = placeholder(tf.int32, input_shape, name='input_mask')
        self.segment_ids = placeholder(tf.int32,
                                       input_shape,
                                       name='segment_ids')

        (self.start_logits, self.end_logits) = \
          create_model_top(bert_config, False, # is training
                           self.input_ids, self.input_mask, self.segment_ids,
                           False, # use_one_hot_embeddings
                           None) # frozen graph path

        latest_model = tf.train.latest_checkpoint(FLAGS.output_dir)
        saver = tf.compat.v1.train.Saver()
        saver.restore(self.session, latest_model)

        self.output_dir = output_dir
        self.dest_dir = os.path.join(self.output_dir, "frozen")
        if not os.path.exists(self.dest_dir):
            os.mkdir(self.dest_dir)
Example #4
0
def eval_all():

    #    output_model_file = "../../output/best_model"
    output_model_file = MODEL_PATH
    output_config_file = CONFIG_PATH

    config = BertConfig(output_config_file)
    model = BertForQuestionAnswering(config)
    if next(model.parameters()).is_cuda:
        try:
            model.load_state_dict(torch.load(output_model_file))
        except:
            model = nn.DataParallel(model)
            model.load_state_dict(torch.load(output_model_file))
    else:
        try:
            model.load_state_dict(
                torch.load(output_model_file, map_location='cpu'))
        except:
            model = nn.DataParallel(model)
            model.load_state_dict(
                torch.load(output_model_file, map_location='cpu'))

    result_file_path = os.path.join('../metric', args.result_file_name)
    evaluate(model, result_file=result_file_path)
def prepare_model(args, device):

    # Prepare model
    config = BertConfig.from_json_file(args.bert_config_path)

    # Padding for divisibility by 8
    if config.vocab_size % 8 != 0:
        config.vocab_size += 8 - (config.vocab_size % 8)
        print('padded vocab size to: {}'.format(config.vocab_size))

    # Set some options that the config file is expected to have (but don't need to be set properly
    # at this point)
    config.pad = False
    config.unpad = False
    config.dense_seq_output = False
    config.fused_mha = False
    config.fused_gelu_bias = False
    config.fuse_qkv = False
    config.fuse_scale = False
    config.fuse_mask = False
    config.fuse_dropout = False
    config.apex_softmax = False
    config.enable_stream = False
    if config.fuse_mask == True: config.apex_softmax = True
    if config.pad == False: config.enable_stream = True
    if config.unpad == True: config.fused_mha = False

    #Load from TF checkpoint
    model = BertForPreTraining.from_pretrained(args.tf_checkpoint,
                                               from_tf=True,
                                               config=config)

    return model
Example #6
0
    def __init__(self):
        self.bert_config = BertConfig.from_json_file(
            os.path.join(path, 'uncased_L-12_H-768_A-12/bert_config.json'))
        self.max_sequence_length = 128

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        if self.max_sequence_length > self.bert_config.max_position_embeddings:
            raise ValueError(
                "Cannot use sequence length {} because the BERT model was only trained up to sequence length {}"
                .format(self.max_sequence_length,
                        self.bert_config.max_position_embeddings))

        self.processor = LogicProcessor()
        self.label_list = self.processor.get_labels()
        self.tokenizer = tokenization.FullTokenizer(vocab_file=os.path.join(
            path, 'uncased_L-12_H-768_A-12/vocab.txt'),
                                                    do_lower_case=False)

        self.model = BertForSequenceClassification(self.bert_config,
                                                   len(self.label_list))
        init_checkpoint = os.path.join(path, 'model/logic_model_500.bin')
        #Future save model Load code

        if init_checkpoint is not None:
            self.model.load_state_dict(
                torch.load(init_checkpoint, map_location='cpu'))

        self.model.to(self.device)
Example #7
0
    def load_stock_model(model_dir, max_seq_len):
        from modeling import BertModel, BertConfig, get_assignment_map_from_checkpoint

        tf.compat.v1.reset_default_graph(
        )  # to scope naming for checkpoint loading (if executed more than once)

        bert_config_file = os.path.join(model_dir, "bert_config.json")
        bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")

        pl_input_ids = tf.compat.v1.placeholder(tf.int32,
                                                shape=(1, max_seq_len))
        pl_mask = tf.compat.v1.placeholder(tf.int32, shape=(1, max_seq_len))
        pl_token_type_ids = tf.compat.v1.placeholder(tf.int32,
                                                     shape=(1, max_seq_len))

        bert_config = BertConfig.from_json_file(bert_config_file)

        s_model = BertModel(config=bert_config,
                            is_training=False,
                            input_ids=pl_input_ids,
                            input_mask=pl_mask,
                            token_type_ids=pl_token_type_ids,
                            use_one_hot_embeddings=False)

        tvars = tf.compat.v1.trainable_variables()
        (assignment_map,
         initialized_var_names) = get_assignment_map_from_checkpoint(
             tvars, bert_ckpt_file)
        tf.compat.v1.train.init_from_checkpoint(bert_ckpt_file, assignment_map)

        return s_model, pl_input_ids, pl_token_type_ids, pl_mask
Example #8
0
def get_model_from_args(args):
    config = BertConfig.from_json_file(args.config_file)
    if config.vocab_size % 8 != 0:
        config.vocab_size += 8 - (config.vocab_size % 8)

    class BertForQuestionAnswering_int32_inputs(BertForQuestionAnswering):
        def forward(self, input_ids, segment_ids, attention_mask):
            input_ids, segment_ids, attention_mask = input_ids.long(
            ), segment_ids.long(), attention_mask.long()
            return super().forward(input_ids, segment_ids, attention_mask)

    model = BertForQuestionAnswering_int32_inputs(config)

    model.enable_apex(False)
    if os.path.isfile(args.checkpoint):
        state_dict = torch.load(args.checkpoint, map_location="cpu")
        state_dict = state_dict["model"] if "model" in state_dict.keys(
        ) else state_dict
        model.load_state_dict(state_dict, strict=False)
    if args.precision == "fp16":
        model = model.half()
    device = "cuda:0" if not args.cpu else "cpu"
    model = model.to(device)
    model.eval()
    model.bermuda_batch_axis = 0 if not args.fixed_batch_dim else None
    return model
Example #9
0
    def load_query_encoder(self, device, args):
        # Configure paths for query encoder serving
        vocab_path = os.path.join(args.metadata_dir, args.vocab_name)
        bert_config_path = os.path.join(
            args.metadata_dir, args.bert_config_name.replace(".json", "") + "_" + args.bert_model_option + ".json"
        )

        # Load pretrained QueryEncoder
        bert_config = BertConfig.from_json_file(bert_config_path)
        model = DenSPI(bert_config)
        if args.parallel:
            model = torch.nn.DataParallel(model)
        state = torch.load(args.query_encoder_path, map_location='cpu')
        try:
            model.load_state_dict(state['model'])
            logger.info('load okay')
        except:
            model.load_state_dict(state, strict=False)
            check_diff(model.state_dict(), state['model'])
        logger.info('Model loaded from %s' % args.query_encoder_path)
        model.to(device)

        tokenizer = tokenization.FullTokenizer(vocab_file=vocab_path, do_lower_case=not args.do_case)
        logger.info('Model loaded from %s' % args.query_encoder_path)
        logger.info('Number of model parameters: {:,}'.format(sum(p.numel() for p in model.parameters())))
        return model, tokenizer
def convert():
    # Initialise PyTorch model
    config = BertConfig.from_json_file(args.bert_config_file)
    model = BertModel(config)

    # Load weights from TF model
    path = args.tf_checkpoint_path
    print("Converting TensorFlow checkpoint from {}".format(path))

    init_vars = tf.train.list_variables(path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading {} with shape {}".format(name, shape))
        array = tf.train.load_variable(path, name)
        print("Numpy array shape {}".format(array.shape))
        names.append(name)
        arrays.append(array)

    for name, array in zip(names, arrays):
        if not name.startswith("bert"):
            print("Skipping {}".format(name))
            continue
        else:
            name = name.replace("bert/", "")  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if name[0] in ['redictions', 'eq_relationship'
                       ] or name[-1] == "adam_v" or name[-1] == "adam_m":
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    torch.save(model.state_dict(), args.pytorch_dump_path)
def convert_tmp_to_pytorch(bert_config_file, pytorch_dump_path):
    import torch
    from modeling import BertConfig, BertForPreTraining
    import pickle

    with open("tmp_names", "rb") as fp:  # Unpickling
        # names = pickle.load(fp, encoding='iso-8859-1')
        names = pickle.load(fp)
    with open("tmp_arrays", "rb") as fp:  # Unpickling
        # arrays = pickle.load(fp, encoding='iso-8859-1')
        arrays = pickle.load(fp)

    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names, arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if name[-1] in ["adam_v", "adam_m", 'global_step']:
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if fullmatch(r'[A-Za-z]+_\d+', m_name):
                # if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Example #12
0
def convert():
    args = parser.parse_args()
    args.tf_checkpoint_path = "chinese_L-12_H-768_A-12\\bert_model.ckpt"
    args.bert_config_file = "chinese_L-12_H-768_A-12\\bert_config.json"
    args.pytorch_dump_path = "chinese_L-12_H-768_A-12\pytorch_model.bin"
    # Initialise PyTorch model
    config = BertConfig.from_json_file(args.bert_config_file)
    model = BertModel(config)

    # Load weights from TF model
    path = args.tf_checkpoint_path
    print("Converting TensorFlow checkpoint from {}".format(path))

    init_vars = tf.train.list_variables(path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading {} with shape {}".format(name, shape))
        array = tf.train.load_variable(path, name)
        print("Numpy array shape {}".format(array.shape))
        names.append(name)
        arrays.append(array)

    for name, array in zip(names, arrays):
        name = name[5:]  # skip "bert/"
        print("Loading {}".format(name))
        name = name.split('/')
        if name[0] in ['redictions', 'eq_relationship']:
            print("Skipping")
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    torch.save(model.state_dict(), args.pytorch_dump_path)
Example #13
0
    def get_predictor_model(cls):

        config = BertConfig.from_json_file(config_file)
        model = BertForQuestionAnswering(config)
        model.load_state_dict(
            torch.load(MODEL_PATH, map_location='cpu')["model"])
        model.to(device)
        cls.model = model

        return cls.model
Example #14
0
def eval_all():

    output_model_file = "../model_dir/best_model"
    output_config_file = "../model_dir/bert_config.json"

    config = BertConfig(output_config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load(output_model_file))  #, map_location='cpu'))
    evaluate(model.cpu(), result_file="../metric/predicts.json")
Example #15
0
    def prepare_model_and_optimizer(self):
        # Prepare model
        self.config = BertConfig.from_json_file(self.args.config_file)

        # Padding for divisibility by 8
        if self.config.vocab_size % 8 != 0:
            self.config.vocab_size += 8 - (self.config.vocab_size % 8)
        self.model = BertForPreTraining(self.config)
        self.another_model = BertForPreTraining(self.config)

        self.model.to(self.device)
        self.another_model.to(self.device)
        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta', 'LayerNorm']

        optimizer_grouped_parameters = []
        names = []

        for n, p in param_optimizer:
            if not any(nd in n for nd in no_decay):
                optimizer_grouped_parameters.append({
                    'params': [p],
                    'weight_decay': 0.01,
                    'name': n
                })
                names.append({'params': [n], 'weight_decay': 0.01})
            if any(nd in n for nd in no_decay):
                optimizer_grouped_parameters.append({
                    'params': [p],
                    'weight_decay': 0.00,
                    'name': n
                })
                names.append({'params': [n], 'weight_decay': 0.00})

        if self.args.phase2:
            max_steps = self.args.max_steps
            tmp = max_steps * 10
            r = self.args.phase1_end_step / tmp
            lr = self.args.learning_rate * (1 - r)
        else:
            max_steps = int(self.args.max_steps / 9 * 10)
            lr = self.args.learning_rate
        if self.args.optimizer == "lamb":
            self.optimizer = BertLAMB(optimizer_grouped_parameters,
                                      lr=lr,
                                      warmup=self.args.warmup_proportion
                                      if not self.args.phase2 else -1,
                                      t_total=max_steps)
        elif self.args.optimizer == "adam":
            self.optimizer = BertAdam(optimizer_grouped_parameters,
                                      lr=lr,
                                      warmup=self.args.warmup_proportion
                                      if not self.args.phase2 else -1,
                                      t_total=max_steps)
def initialize_model(args):
    ''' return model, ready to trace '''
    config = BertConfig.from_json_file(args.config_file)
    if config.vocab_size % 8 != 0:
        config.vocab_size += 8 - (config.vocab_size % 8)
    model = BertForQuestionAnswering(config)
    model.enable_apex(False)
    state_dict = torch.load(args.checkpoint, map_location='cpu')["model"]
    model.load_state_dict(state_dict)
    if args.fp16:
        model.half()
    return model
Example #17
0
    def __init__(self,
                 model_name: str,
                 models_dir='_models',
                 device='/device:GPU:0',
                 is_training=False,
                 use_one_hot_embeddings=False,
                 verb=0):

        self.model_name = model_name
        self.models_dir = models_dir
        if verb > 0:
            print('\n*** BertMC *** initializing (folder: %s, model: %s)' %
                  (self.models_dir, self.model_name))

        self.graph = tf.Graph()
        with self.graph.as_default():

            device = device_TF(devices=device, verb=verb)[0]
            if verb > 1: print(' > building graph on cuda: %s' % device)
            with tf.device(device):

                self.features = {
                    'input_ids':
                    tf.placeholder(shape=[None, None], dtype=tf.int32),
                    'input_mask':
                    tf.placeholder(shape=[None, None], dtype=tf.int32),
                    'input_type_ids':
                    tf.placeholder(shape=[None, None], dtype=tf.int32)
                }

                super(BertMC, self).__init__(
                    config=BertConfig.from_json_file(self.models_dir + '/' +
                                                     self.model_name +
                                                     '/bert_config.json'),
                    is_training=is_training,
                    input_ids=self.features['input_ids'],
                    input_mask=self.features['input_mask'],
                    token_type_ids=self.features['input_type_ids'],
                    use_one_hot_embeddings=use_one_hot_embeddings)

            self.tvars = tf.trainable_variables()

            checkpoint = self.models_dir + '/' + self.model_name + '/bert_model.ckpt'
            (assignment_map,
             initialized_variable_names) = get_assignment_map_from_checkpoint(
                 self.tvars, checkpoint)
            tf.train.init_from_checkpoint(checkpoint, assignment_map)
            init = tf.global_variables_initializer()

        self.sess = tf.Session(
            graph=self.graph, config=tf.ConfigProto(allow_soft_placement=True))
        self.sess.run(init)
Example #18
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, tf_checkpoint_path)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Example #19
0
    def bert_module_fn(is_training):
        """Spec function for a token embedding module."""

        input_ids = tf.placeholder(shape=[None, None],
                                   dtype=tf.int32,
                                   name="input_ids")
        input_mask = tf.placeholder(shape=[None, None],
                                    dtype=tf.int32,
                                    name="input_mask")
        token_type = tf.placeholder(shape=[None, None],
                                    dtype=tf.int32,
                                    name="segment_ids")

        config = BertConfig.from_json_file(config_path)
        model = BertModel(config=config,
                          is_training=is_training,
                          input_ids=input_ids,
                          input_mask=input_mask,
                          token_type_ids=token_type)

        model.input_to_output()
        seq_output = model.get_all_encoder_layers()[-1]

        config_file = tf.constant(value=config_path,
                                  dtype=tf.string,
                                  name="config_file")
        vocab_file = tf.constant(value=vocab_path,
                                 dtype=tf.string,
                                 name="vocab_file")
        lower_case = tf.constant(do_lower_case)

        tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, config_file)
        tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file)

        input_map = {
            "input_ids": input_ids,
            "input_mask": input_mask,
            "segment_ids": token_type
        }

        output_map = {"sequence_output": seq_output}

        output_info_map = {
            "vocab_file": vocab_file,
            "do_lower_case": lower_case
        }

        hub.add_signature(name="tokens", inputs=input_map, outputs=output_map)
        hub.add_signature(name="tokenization_info",
                          inputs={},
                          outputs=output_info_map)
Example #20
0
    def __init__(self, output_dir: str, task_name: str, bert_config: str,
                 max_seq_length: int):

        processors = {
            "cola": ColaProcessor,
            "mnli": MnliProcessor,
            "mrpc": MrpcProcessor,
            "xnli": XnliProcessor
        }

        task_name = task_name.lower()
        if task_name not in processors:
            raise ValueError("Task not found: %s" % (task_name))

        processor = processors[task_name]()
        label_list = processor.get_labels()
        num_labels = len(label_list)

        # create model for CPU/dGPU, not TPU
        use_one_hot_embeddings = False

        bert_config = BertConfig.from_json_file(bert_config)
        bert_config.experimental_gelu = FLAGS.experimental_gelu
        if FLAGS.precision:
            bert_config.precision = FLAGS.precision

        self.session = tf.compat.v1.Session()

        placeholder = tf.compat.v1.placeholder
        input_shape = [None, max_seq_length]
        self.label_ids = placeholder(tf.int32, [None], name='label_ids')
        self.input_ids = placeholder(tf.int32, input_shape, name='input_ids')
        self.input_mask = placeholder(tf.int32, input_shape, name='input_mask')
        self.segment_ids = placeholder(tf.int32,
                                       input_shape,
                                       name='segment_ids')

        self.loss, self.per_example_loss, self.logits, self.probabilities = \
          create_model_top(bert_config, False, # is training
                           self.input_ids, self.input_mask, self.segment_ids,
                           self.label_ids, num_labels, use_one_hot_embeddings,
                           None) # frozen graph path

        latest_model = tf.train.latest_checkpoint(FLAGS.output_dir)
        saver = tf.compat.v1.train.Saver()
        saver.restore(self.session, latest_model)

        self.output_dir = output_dir
        self.dest_dir = os.path.join(self.output_dir, "frozen")
        if not os.path.exists(self.dest_dir):
            os.mkdir(self.dest_dir)
Example #21
0
def init_models():
    models = []

    for model_path, model_num, config_path in zip(MODEL_PATHS, MODEL_NUMS, CONFIG_PATHS):
        config = BertConfig(config_path)
        model = BertForQuestionAnswerings[model_num](config)
        try:
            model.load_state_dict(torch.load(model_path))
        except:
            model = nn.DataParallel(model)
            model.load_state_dict(torch.load(model_path))

        models.append(model)
    return models
Example #22
0
 def __init__(self):
     self.input_ids = tf.placeholder(tf.int32, [None, None])
     self.input_mask = tf.placeholder(tf.int32, [None, None])
     self.model = BertModel(config=BertConfig.from_json_file(bert_config),
                            is_training=True,
                            input_ids=self.input_ids,
                            input_mask=self.input_mask)
     self.is_training = tf.placeholder(tf.bool, [])
     self.predictions = self.construct_model(self.model)
     self.id_predictions = tf.argmax(self.predictions, axis=2)
     self.Y = tf.placeholder(tf.float32, [None, None, len(entity_types)])
     self.tokenizer = tokenization.FullTokenizer(vocab_file,
                                                 do_lower_case=False)
     self.model_path = "/scratch/sanjay/bert/bert_ner_model.ckpt"
     self.saver = tf.train.Saver()
 def init_modle(self, model):
     print(f'starting to init model')
     vocab_path = os.path.join(self.model_path, 'vocab.txt')
     bert_config_file = os.path.join(self.model_path, 'bert_config.json')
     self.bert_config = BertConfig.from_json_file(bert_config_file)
     self.model = model(self.bert_config, 2)
     weight_path = os.path.join(self.model_path, 'pytorch_model.bin')
     new_state_dict = torch.load(weight_path)
     new_state_dict = dict([(k[7:], v) if k.startswith('module') else (k, v)
                            for k, v in new_state_dict.items()])
     self.model.load_state_dict(new_state_dict)
     self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_path)
     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     self.model.to(self.device)
     self.model.eval()
     print(f'init {model}  model finished')
Example #24
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):

    # 加载模型参数
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))

    # 加载模型
    model = BertForPreTraining(config)

    # 加载检查点参数到模型中,进行处理
    # 但是有一个问题,为什么加不加返回model都能返回???猜测其为内部已进行处理
    load_tf_weights_in_bert(model, tf_checkpoint_path)
    print("Save PyTorch model to {}".format(pytorch_dump_path))

    # 保存pytorch的检查点
    torch.save(model.state_dict(), pytorch_dump_path)
Example #25
0
    def test_encode_context(self):

        x_context_value = [
            [ 151, 12553, 8997, 8792,  10086, 8168, 10481, 9356,  8174, 10404, 9066, 10003, 10610, 10879]
            + [0 for i in range(312 - 14)],
            [8670, 11136, 8997, 10564, 8303,  8228, 8373,  10003, 8307, 119,   151,  12553, 8233,  8815]
            + [0 for i in range(312 - 14)]
        ]

        bert_scope = tf.VariableScope(name="bert", reuse=tf.AUTO_REUSE)
        bert_config = BertConfig.from_json_file(self._poly_encoder_config.bert_config)
        x_context = tf.convert_to_tensor(value=x_context_value, dtype=tf.int32)
        context_vecs, poly_code_mask = self._encoder_inst.encode_context(x_context=x_context,
                                                                                   bert_config=bert_config,
                                                                                   bert_scope=bert_scope)
        print(context_vecs)
        print(poly_code_mask)
Example #26
0
    def test_encode_candidate(self):

        x_response_value = [
            [10378, 119, 119, 151, 8815, 8281, 8211, 10425, 8154, 0, 0, 0, 0, 0]
            + [0 for i in range(512 - 14)],
            [165, 8991, 8181, 8184, 131, 120, 120, 8134, 11300, 10540, 8735, 8207, 0, 0]
            + [0 for i in range(512 - 14)]
        ]

        bert_scope = tf.VariableScope(name="bert", reuse=tf.AUTO_REUSE)
        bert_config = BertConfig.from_json_file(self._poly_encoder_config.bert_config)
        x_response = tf.convert_to_tensor(value=x_response_value, dtype=tf.int32)
        x_response_emb, x_response_mask = self._encoder_inst.encode_candidate(x_response=x_response,
                                                                              bert_config=bert_config,
                                                                              bert_scope=bert_scope)
        print(x_response_emb)
        print(x_response_mask)
Example #27
0
def make_global_options(task_specific_parsers=[]):
    # Parse command-line arguments
    command_line_parser = create_command_line_parser()
    all_options_parser = create_all_options_parser()

    for task_parser in task_specific_parsers:
        all_options_parser = task_parser(all_options_parser)

    known_command_line_args, unknown_command_line_args = command_line_parser.parse_known_args(
    )

    if known_command_line_args.help or known_command_line_args.config is None:
        all_options_parser.print_help()
        sys.exit(os.EX_OK)

    # Parse options specified in the configuration file into
    config_file_path = known_command_line_args.config
    opts_from_config_file = BertConfig.from_json_file(config_file_path)

    # Build the global options structure from the default options
    current_options = vars(all_options_parser.parse_args())

    unknown_options = [
        opt for opt in opts_from_config_file.keys()
        if opt not in current_options.keys()
    ]

    if unknown_options:
        logging.error(f"Unonwn options: {unknown_options}")
        sys.exit(os.EX_USAGE)

    # Overwrite global options by those specified in the config file.
    current_options.update(opts_from_config_file)
    options_namespace = argparse.Namespace(**current_options)

    # Overwrite with command-line arguments
    all_options_namespace = all_options_parser.parse_args(
        unknown_command_line_args, options_namespace)
    logging.info(
        f"Overwrite configuration parameters: {', '.join(unknown_command_line_args)}"
    )

    # argparse.Namespace -> dict()
    opts = vars(all_options_namespace)

    return opts
Example #28
0
    def create_stock_bert_graph(bert_config_file, max_seq_len):
        from modeling import BertModel, BertConfig

        tf_placeholder = tf.compat.v1.placeholder

        pl_input_ids = tf_placeholder(tf.int32, shape=(1, max_seq_len))
        pl_mask = tf_placeholder(tf.int32, shape=(1, max_seq_len))
        pl_token_type_ids = tf_placeholder(tf.int32, shape=(1, max_seq_len))

        bert_config = BertConfig.from_json_file(bert_config_file)
        s_model = BertModel(config=bert_config,
                            is_training=False,
                            input_ids=pl_input_ids,
                            input_mask=pl_mask,
                            token_type_ids=pl_token_type_ids,
                            use_one_hot_embeddings=False)

        return s_model, pl_input_ids, pl_mask, pl_token_type_ids
Example #29
0
    def feature_extactor(self, dummy=False):
        if not dummy:
            model_pre_trained = PreTrainedBertModel(self.model_dir,
                                                    Verbose=True)
        else:
            print("Using randomly initialized model...")
            model_pre_trained = BERTModel(config=BertConfig(),
                                          Verbose=True,
                                          trainable=False)
            model_pre_trained.build(input_shape=(self.batch_size,
                                                 self.max_seq_length))

        print("Computing embeddings...")
        self.bert_embeddings = model_pre_trained(K.variable(
            self.all_input_ids))
        print("Evaluating...")
        self.all_encoder_layers = np.array(
            [K.eval(emb) for emb in self.bert_embeddings[:-1]])
        print("Output BERT shape: ", self.all_encoder_layers.shape)
 def __init__(self, model_path):
     """ to obtain sentences embeddings model
         model path: init model weight path
     """
     vocab_path = os.path.join(model_path, 'vocab.txt')
     bert_config_file = os.path.join(model_path, 'bert_config.json')
     self.bert_config = BertConfig.from_json_file(bert_config_file)
     print(f'starting to init model')
     self.model = TwoSentenceClassifier(self.bert_config, 2)
     weight_path = os.path.join(model_path, 'pytorch_model.bin')
     new_state_dict = torch.load(weight_path)
     new_state_dict = dict([(k[7:], v) if k.startswith('module') else (k, v)
                            for k, v in new_state_dict.items()])
     self.model.load_state_dict(new_state_dict)
     self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_path)
     self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
     self.model.to(self.device)
     self.model.eval()
     print(f'init model finished')
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--data_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
    parser.add_argument("--bert_config_file",
                        default=None,
                        type=str,
                        required=True,
                        help="The config json file corresponding to the pre-trained BERT model. \n"
                             "This specifies the model architecture.")
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument("--vocab_file",
                        default=None,
                        type=str,
                        required=True,
                        help="The vocabulary file that the BERT model was trained on.")
    parser.add_argument("--output_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The output directory where the model checkpoints will be written.")

    ## Other parameters
    parser.add_argument("--init_checkpoint",
                        default=None,
                        type=str,
                        help="Initial checkpoint (usually from a pre-trained BERT model).")
    parser.add_argument("--do_lower_case",
                        default=False,
                        action='store_true',
                        help="Whether to lower case the input text. True for uncased models, False for cased models.")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. \n"
                             "Sequences longer than this will be truncated, and sequences shorter \n"
                             "than this will be padded.")
    parser.add_argument("--do_train",
                        default=False,
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        default=False,
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--warmup_proportion",
                        default=0.1,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. "
                             "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--save_checkpoints_steps",
                        default=1000,
                        type=int,
                        help="How often to save the model checkpoint.")
    parser.add_argument("--no_cuda",
                        default=False,
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed', 
                        type=int, 
                        default=42,
                        help="random seed for initialization")
    parser.add_argument('--gradient_accumulation_steps',
                        type=int,
                        default=1,
                        help="Number of updates steps to accumualte before performing a backward/update pass.")                       
    parser.add_argument('--optimize_on_cpu',
                        default=False,
                        action='store_true',
                        help="Whether to perform optimization and keep the optimizer averages on CPU")
    parser.add_argument('--fp16',
                        default=False,
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--loss_scale',
                        type=float, default=128,
                        help='Loss scaling, positive power of 2 values can improve fp16 convergence.')

    args = parser.parse_args()

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "news": NewsProcessor,
    }

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        # torch.distributed.init_process_group(backend='nccl')
        if args.fp16:
            logger.info("16-bits training currently not supported in distributed training")
            args.fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496)
    logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))

    if args.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
                            args.gradient_accumulation_steps))

    args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError("At least one of `do_train` or `do_eval` must be True.")

    bert_config = BertConfig.from_json_file(args.bert_config_file)

    if args.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length {} because the BERT model was only trained up to sequence length {}".format(
            args.max_seq_length, bert_config.max_position_embeddings))

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    os.makedirs(args.output_dir, exist_ok=True)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))


    processor = processors[task_name]()

    tokenizer = tokenization.FullTokenizer(
        vocab_file=args.vocab_file, do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_steps = int(
            len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)

    label_list = processor.get_labels()

    print("label_list.size:%d\n" %(len(label_list)))

    # Prepare model
    model = BertForSequenceClassification(bert_config, len(label_list))
    if args.init_checkpoint is not None:
        model.bert.load_state_dict(torch.load(args.init_checkpoint, map_location='cpu'))
    if args.fp16:
        model.half()
    model.to(device)
    #if args.local_rank != -1:
        #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
     #                                                     output_device=args.local_rank)
    #elif n_gpu > 1:
    #    model = torch.nn.DataParallel(model)

    # Prepare optimizer
    if args.fp16:
        param_optimizer = [(n, param.clone().detach().to('cpu').float().requires_grad_()) \
                            for n, param in model.named_parameters()]
    elif args.optimize_on_cpu:
        param_optimizer = [(n, param.clone().detach().to('cpu').requires_grad_()) \
                            for n, param in model.named_parameters()]
    else:
        param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if n not in no_decay], 'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if n in no_decay], 'weight_decay_rate': 0.0}
        ]
    optimizer = BERTAdam(optimizer_grouped_parameters,
                         lr=args.learning_rate,
                         warmup=args.warmup_proportion,
                         t_total=num_train_steps)

    global_step = 0
    if args.do_train:
        train_features = convert_examples_to_features(
            train_examples, label_list, args.max_seq_length, tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            
            train_sampler = RandomSampler(train_data)
            #train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids = batch
                loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
                if n_gpu > 1:
                    loss = loss.mean() # mean() to average on multi-gpu.
                if args.fp16 and args.loss_scale != 1.0:
                    # rescale loss for fp16 training
                    # see https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html
                    loss = loss * args.loss_scale
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps
                loss.backward()
                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16 or args.optimize_on_cpu:
                        if args.fp16 and args.loss_scale != 1.0:
                            # scale down gradients for fp16 training
                            for param in model.parameters():
                                param.grad.data = param.grad.data / args.loss_scale
                        is_nan = set_optimizer_params_grad(param_optimizer, model.named_parameters(), test_nan=True)
                        if is_nan:
                            logger.info("FP16 TRAINING: Nan in gradients, reducing loss scaling")
                            args.loss_scale = args.loss_scale / 2
                            model.zero_grad()
                            continue
                        optimizer.step()
                        copy_optimizer_params_to_model(model.named_parameters(), param_optimizer)
                    else:
                        optimizer.step()
                    model.zero_grad()
                    global_step += 1

    if args.do_eval:
        eval_examples = processor.get_dev_examples(args.data_dir)
        eval_features = convert_examples_to_features(
            eval_examples, label_list, args.max_seq_length, tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            eval_sampler = SequentialSampler(eval_data)
        else:

            eval_sampler = SequentialSampler(eval_data)
            #eval_sampler = DistributedSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            label_ids = label_ids.to(device)

            with torch.no_grad():
                tmp_eval_loss, logits = model(input_ids, segment_ids, input_mask, label_ids)

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            tmp_eval_accuracy = accuracy(logits, label_ids)

            eval_loss += tmp_eval_loss.mean().item()
            eval_accuracy += tmp_eval_accuracy

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        eval_loss = eval_loss / nb_eval_steps
        eval_accuracy = eval_accuracy / nb_eval_examples

        result = {'eval_loss': eval_loss,
                  'eval_accuracy': eval_accuracy,
                  'global_step': global_step,
                  'loss': tr_loss/nb_tr_steps}

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results *****")
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))