def run_bert_pretrain(strategy, input_meta_data):
    """Runs BERT pre-training."""

    albert_config = AlbertConfig.from_json_file(FLAGS.albert_config_file)
    if not strategy:
        raise ValueError('Distribution strategy is not specified.')

    # Runs customized training loop.
    logging.info(
        'Training using customized training loop TF 2.0 with distrubuted'
        'strategy.')

    num_train_steps = None
    num_warmup_steps = None
    steps_per_epoch = None
    if FLAGS.do_train:
        len_train_examples = input_meta_data['train_data_size']
        steps_per_epoch = int(len_train_examples / FLAGS.train_batch_size)
        num_train_steps = int(len_train_examples / FLAGS.train_batch_size *
                              FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    return run_customized_training(strategy, albert_config,
                                   input_meta_data["max_seq_length"],
                                   input_meta_data["max_predictions_per_seq"],
                                   FLAGS.output_dir, steps_per_epoch,
                                   steps_per_epoch, FLAGS.num_train_epochs,
                                   FLAGS.learning_rate, num_warmup_steps,
                                   FLAGS.input_files, FLAGS.train_batch_size)
Beispiel #2
0
def get_model_v2_bidaf(albert_config_dict, max_seq_length, init_checkpoint,
                       learning_rate, start_n_top, end_n_top, dropout,
                       num_train_steps, num_warmup_steps):
    """Returns keras model"""
    if isinstance(albert_config_dict, dict):
        albert_config = AlbertConfig.from_dict(albert_config_dict)
    else:
        albert_config = albert_config_dict
    print('new model ALBertQAModel_v2 ...')
    squad_model = ALBertQAModel_v2(albert_config, max_seq_length,
                                   init_checkpoint, start_n_top, end_n_top,
                                   dropout)

    learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=learning_rate,
        decay_steps=num_train_steps,
        end_learning_rate=0.0)
    if num_warmup_steps:
        learning_rate_fn = WarmUp(initial_learning_rate=learning_rate,
                                  decay_schedule_fn=learning_rate_fn,
                                  warmup_steps=num_warmup_steps)

    if FLAGS.optimizer == "LAMB":
        optimizer_fn = LAMB
    else:
        optimizer_fn = AdamWeightDecay

    optimizer = optimizer_fn(learning_rate=learning_rate_fn,
                             weight_decay_rate=FLAGS.weight_decay,
                             beta_1=0.9,
                             beta_2=0.999,
                             epsilon=FLAGS.adam_epsilon,
                             exclude_from_weight_decay=['layer_norm', 'bias'])

    squad_model.optimizer = optimizer

    return squad_model
Beispiel #3
0
def main(_):
  assert tf.version.VERSION.startswith('2.')
  output_path = FLAGS.converted_checkpoint_path
  v1_checkpoint = FLAGS.checkpoint_to_convert
  albert_config = AlbertConfig.from_json_file(FLAGS.albert_config_file)
  convert_checkpoint(albert_config, output_path, v1_checkpoint)
Beispiel #4
0
def main(_):
    logging.set_verbosity(logging.INFO)

    if FLAGS.enable_xla:
        set_config_v2(FLAGS.enable_xla)

    strategy = None
    if FLAGS.strategy_type == "one":
        strategy = tf.distribute.OneDeviceStrategy("GPU:0")
    elif FLAGS.strategy_type == "mirror":
        strategy = tf.distribute.MirroredStrategy()
    else:
        raise ValueError(
            'The distribution strategy type is not supported: %s' %
            FLAGS.strategy_type)

    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
        input_meta_data = json.loads(reader.read().decode('utf-8'))

    num_labels = input_meta_data["num_labels"]
    FLAGS.max_seq_length = input_meta_data["max_seq_length"]
    processor_type = input_meta_data['processor_type']

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    albert_config = AlbertConfig.from_json_file(FLAGS.albert_config_file)

    if FLAGS.max_seq_length > albert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the ALBERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, albert_config.max_position_embeddings))

    tf.io.gfile.makedirs(FLAGS.output_dir)

    num_train_steps = None
    num_warmup_steps = None
    steps_per_epoch = None
    if FLAGS.do_train:
        len_train_examples = input_meta_data['train_data_size']
        steps_per_epoch = int(len_train_examples / FLAGS.train_batch_size)
        num_train_steps = int(len_train_examples / FLAGS.train_batch_size *
                              FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    loss_multiplier = 1.0 / strategy.num_replicas_in_sync

    with strategy.scope():
        model = get_model(albert_config=albert_config,
                          max_seq_length=FLAGS.max_seq_length,
                          num_labels=num_labels,
                          init_checkpoint=FLAGS.init_checkpoint,
                          learning_rate=FLAGS.learning_rate,
                          num_train_steps=num_train_steps,
                          num_warmup_steps=num_warmup_steps,
                          loss_multiplier=loss_multiplier)
    model.summary()

    if FLAGS.do_train:
        logging.info("***** Running training *****")
        logging.info("  Num examples = %d", len_train_examples)
        logging.info("  Batch size = %d", FLAGS.train_batch_size)
        logging.info("  Num steps = %d", num_train_steps)

        train_input_fn = functools.partial(create_classifier_dataset,
                                           FLAGS.train_data_path,
                                           seq_length=FLAGS.max_seq_length,
                                           batch_size=FLAGS.train_batch_size,
                                           drop_remainder=False)

        eval_input_fn = functools.partial(create_classifier_dataset,
                                          FLAGS.eval_data_path,
                                          seq_length=FLAGS.max_seq_length,
                                          batch_size=FLAGS.eval_batch_size,
                                          is_training=False,
                                          drop_remainder=False)

        with strategy.scope():

            summary_dir = os.path.join(FLAGS.output_dir, 'summaries')
            summary_callback = tf.keras.callbacks.TensorBoard(summary_dir)
            checkpoint_path = os.path.join(FLAGS.output_dir, 'checkpoint')
            checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                checkpoint_path, save_weights_only=True)
            custom_callbacks = [summary_callback, checkpoint_callback]

            def metric_fn():
                if FLAGS.task_name.lower() == "sts":
                    return tf.keras.metrics.MeanSquaredError(dtype=tf.float32)
                else:
                    return tf.keras.metrics.SparseCategoricalAccuracy(
                        dtype=tf.float32)

            if FLAGS.custom_training_loop:
                if FLAGS.task_name.lower() == "sts":
                    loss_fn = get_loss_fn_v2(loss_factor=loss_multiplier)
                else:
                    loss_fn = get_loss_fn(num_labels,
                                          loss_factor=loss_multiplier)
                model = run_customized_training_loop(
                    strategy=strategy,
                    model=model,
                    loss_fn=loss_fn,
                    model_dir=checkpoint_path,
                    train_input_fn=train_input_fn,
                    steps_per_epoch=steps_per_epoch,
                    epochs=FLAGS.num_train_epochs,
                    eval_input_fn=eval_input_fn,
                    eval_steps=int(input_meta_data['eval_data_size'] /
                                   FLAGS.eval_batch_size),
                    metric_fn=metric_fn,
                    custom_callbacks=custom_callbacks)
            else:
                training_dataset = train_input_fn()
                evaluation_dataset = eval_input_fn()
                model.fit(x=training_dataset,
                          validation_data=evaluation_dataset,
                          epochs=FLAGS.num_train_epochs,
                          callbacks=custom_callbacks)

    if FLAGS.do_eval:

        len_eval_examples = input_meta_data['eval_data_size']

        logging.info("***** Running evaluation *****")
        logging.info("  Num examples = %d", len_eval_examples)
        logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        evaluation_dataset = eval_input_fn()
        with strategy.scope():
            loss, accuracy = model.evaluate(evaluation_dataset)

        print(f"loss : {loss} , Accuracy : {accuracy}")

    if FLAGS.do_predict:

        logging.info("***** Running prediction*****")
        flags.mark_flag_as_required("input_data_dir")
        flags.mark_flag_as_required("predict_data_path")
        tokenizer = tokenization.FullTokenizer(
            vocab_file=None,
            spm_model_file=FLAGS.spm_model_file,
            do_lower_case=FLAGS.do_lower_case)

        processors = {
            "cola": classifier_data_lib.ColaProcessor,
            "sts": classifier_data_lib.StsbProcessor,
            "sst": classifier_data_lib.Sst2Processor,
            "mnli": classifier_data_lib.MnliProcessor,
            "qnli": classifier_data_lib.QnliProcessor,
            "qqp": classifier_data_lib.QqpProcessor,
            "rte": classifier_data_lib.RteProcessor,
            "mrpc": classifier_data_lib.MrpcProcessor,
            "wnli": classifier_data_lib.WnliProcessor,
            "xnli": classifier_data_lib.XnliProcessor,
        }
        task_name = FLAGS.task_name.lower()
        if task_name not in processors:
            raise ValueError("Task not found: %s" % (task_name))

        processor = processors[task_name]()

        predict_examples = processor.get_test_examples(FLAGS.input_data_dir)

        label_list = processor.get_labels()
        label_map = {i: label for i, label in enumerate(label_list)}

        classifier_data_lib.file_based_convert_examples_to_features(
            predict_examples, label_list, input_meta_data['max_seq_length'],
            tokenizer, FLAGS.predict_data_path)

        predict_input_fn = functools.partial(
            create_classifier_dataset,
            FLAGS.predict_data_path,
            seq_length=input_meta_data['max_seq_length'],
            batch_size=FLAGS.eval_batch_size,
            is_training=False,
            drop_remainder=False)
        prediction_dataset = predict_input_fn()

        with strategy.scope():
            logits = model.predict(prediction_dataset)
            if FLAGS.task_name.lower() == "sts":
                predictions = logits
                probabilities = logits
            else:
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                probabilities = tf.nn.softmax(logits, axis=-1)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        output_submit_file = os.path.join(FLAGS.output_dir,
                                          "submit_results.tsv")
        with tf.io.gfile.GFile(output_predict_file, "w") as pred_writer,\
            tf.io.gfile.GFile(output_submit_file, "w") as sub_writer:
            logging.info("***** Predict results *****")
            for (example, probability,
                 prediction) in zip(predict_examples, probabilities,
                                    predictions):
                output_line = "\t".join(
                    str(class_probability.numpy())
                    for class_probability in probability) + "\n"
                pred_writer.write(output_line)

                actual_label = label_map[int(prediction)]
                sub_writer.write(
                    six.ensure_str(example.guid) + "\t" + actual_label + "\n")
Beispiel #5
0
def predict_squad(strategy):
    """Makes predictions for a squad dataset."""
    albert_config = AlbertConfig.from_json_file(FLAGS.albert_config_file)
    doc_stride = FLAGS.doc_stride
    max_query_length = FLAGS.max_query_length

    eval_examples = squad_lib.read_squad_examples(
        input_file=FLAGS.predict_file, is_training=False)

    tokenizer = tokenization.FullTokenizer(vocab_file=None,
                                           spm_model_file=FLAGS.spm_model_file,
                                           do_lower_case=FLAGS.do_lower_case)

    eval_writer = squad_lib.FeatureWriter(filename=os.path.join(
        FLAGS.model_dir.split(',')[0], 'eval.tf_record'),
                                          is_training=False)
    eval_features = []

    def _append_feature(feature):
        eval_features.append(feature)
        eval_writer.process_feature(feature)

    # TPU requires a fixed batch size for all batches, therefore the number
    # of examples must be a multiple of the batch size, or else examples
    # will get dropped. So we pad with fake examples which are ignored
    # later on.
    dataset_size = squad_lib.convert_examples_to_features(
        examples=eval_examples,
        tokenizer=tokenizer,
        max_seq_length=FLAGS.max_seq_length,
        doc_stride=doc_stride,
        max_query_length=max_query_length,
        is_training=False,
        output_fn=_append_feature)
    eval_writer.close()

    logging.info('***** Running predictions *****')
    logging.info('  Num orig examples = %d', len(eval_examples))
    logging.info('  Num split examples = %d', len(eval_features))
    logging.info('  Batch size = %d', FLAGS.predict_batch_size)

    num_steps = math.ceil(dataset_size / FLAGS.predict_batch_size)
    all_results1, all_results2 = predict_squad_customized(
        strategy, albert_config, eval_writer.filename, num_steps)

    model_dirs = FLAGS.model_dir.split(',')
    model_dir1 = model_dirs[0]
    model_dir2 = model_dirs[1]

    output_prediction_file = os.path.join(model_dir1, 'predictions.json')
    output_nbest_file = os.path.join(model_dir1, 'nbest_predictions.json')
    output_null_log_odds_file = os.path.join(model_dir1, 'null_odds.json')

    if FLAGS.version_2_with_negative:
        squad_lib.write_predictions_v2(
            eval_examples, eval_features, all_results1, FLAGS.n_best_size,
            FLAGS.max_answer_length, output_prediction_file, output_nbest_file,
            output_null_log_odds_file, FLAGS.start_n_top, FLAGS.end_n_top)
    else:
        pass

    output_prediction_file = os.path.join(model_dir2, 'predictions.json')
    output_nbest_file = os.path.join(model_dir2, 'nbest_predictions.json')
    output_null_log_odds_file = os.path.join(model_dir2, 'null_odds.json')

    if FLAGS.version_2_with_negative:
        squad_lib.write_predictions_v2(
            eval_examples, eval_features, all_results2, FLAGS.n_best_size,
            FLAGS.max_answer_length, output_prediction_file, output_nbest_file,
            output_null_log_odds_file, FLAGS.start_n_top, FLAGS.end_n_top)
    else:
        pass
Beispiel #6
0
def train_squad(strategy,
                input_meta_data,
                custom_callbacks=None,
                run_eagerly=False):
    """Run bert squad training."""
    if strategy:
        logging.info('Training using customized training loop with distribution'
                     ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    if FLAGS.enable_xla:
        set_config_v2(FLAGS.enable_xla)

    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    num_train_steps = None
    num_warmup_steps = None
    steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
    num_train_steps = int(
        num_train_examples / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    with strategy.scope():
        albert_config = AlbertConfig.from_json_file(FLAGS.albert_config_file)
        if FLAGS.version_2_with_negative:
            model = get_model_v2(albert_config,input_meta_data['max_seq_length'],
                                FLAGS.init_checkpoint, FLAGS.learning_rate,
                                FLAGS.start_n_top, FLAGS.end_n_top,FLAGS.squad_dropout,
                                num_train_steps, num_warmup_steps)
        else:
            model = get_model_v1(albert_config, input_meta_data['max_seq_length'],
                                 FLAGS.init_checkpoint, FLAGS.learning_rate,
                                 num_train_steps, num_warmup_steps)

    if FLAGS.version_2_with_negative:
        train_input_fn = functools.partial(
            input_pipeline.create_squad_dataset_v2,
            FLAGS.train_data_path,
            max_seq_length,
            FLAGS.train_batch_size,
            is_training=True)
    else:
        train_input_fn = functools.partial(
            input_pipeline.create_squad_dataset,
            FLAGS.train_data_path,
            max_seq_length,
            FLAGS.train_batch_size,
            is_training=True)

    # The original BERT model does not scale the loss by
    # 1/num_replicas_in_sync. It could be an accident. So, in order to use
    # the same hyper parameter, we do the same thing here by keeping each
    # replica loss as it is.
    if FLAGS.version_2_with_negative:
        loss_fn = get_loss_fn_v2(
            loss_factor=1.0 / strategy.num_replicas_in_sync)
    else:
        loss_fn = get_loss_fn(loss_factor=1.0 / strategy.num_replicas_in_sync)

    trained_model = run_customized_training_loop(
        strategy=strategy,
        model=model,
        loss_fn=loss_fn,
        model_dir=FLAGS.model_dir,
        train_input_fn=train_input_fn,
        steps_per_epoch=steps_per_epoch,
        # steps_per_loop=steps_per_epoch,
        epochs=FLAGS.num_train_epochs,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks)
Beispiel #7
0
def create_sop(ptd, config):
    return ptd.create_sop(config)


def full_document_sampling(ptd, config):
    return ptd.full_document_sampling(config)


def create_mlm(ptd, config):
    return ptd.create_mlm(config)


def create_sbo(ptd, config):
    return ptd.create_sbo(config)


def none():
    pass


from albert import AlbertConfig, Albert
import tokenization

bert_config = AlbertConfig.from_json('bert_config.json')
transformer = Albert(bert_config)
bertx = PreTrainModel('ptm_config.json', transformer)
train_iterator = bertx.create_instances(
    '00.txt', tokenization.ChineseWordpieceTokenizer('vocab.txt'))
for batch in train_iterator:
    bertx(batch)
Beispiel #8
0
def main(_):

    tfhub_model_path = FLAGS.tf_hub_path
    max_seq_length = 512
    float_type = tf.float32

    input_word_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_mask = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype=tf.int32,
                                       name='input_mask')
    input_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           dtype=tf.int32,
                                           name='input_type_ids')

    if FLAGS.version == 2:
        albert_config = AlbertConfig.from_json_file(
            os.path.join(tfhub_model_path, "assets", "albert_config.json"))
    else:
        albert_config = AlbertConfig.from_json_file(
            os.path.join("model_configs", FLAGS.model, "config.json"))

    tags = []

    stock_values = {}

    with tf.Graph().as_default():
        sm = tf.compat.v2.saved_model.load(tfhub_model_path, tags=tags)
        with tf.compat.v1.Session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            stock_values = {
                v.name.split(":")[0]: v.read_value()
                for v in sm.variables
            }
            stock_values = sess.run(stock_values)

    loaded_weights = set()
    skip_count = 0
    weight_value_tuples = []
    skipped_weight_value_tuples = []

    if FLAGS.model_type == "albert_encoder":
        albert_layer = AlbertModel(config=albert_config, float_type=float_type)

        pooled_output, sequence_output = albert_layer(input_word_ids,
                                                      input_mask,
                                                      input_type_ids)
        albert_model = tf.keras.Model(
            inputs=[input_word_ids, input_mask, input_type_ids],
            outputs=[pooled_output, sequence_output])
        albert_params = albert_model.weights
        param_values = tf.keras.backend.batch_get_value(albert_model.weights)
    else:
        albert_full_model, _ = pretrain_model(albert_config,
                                              max_seq_length,
                                              max_predictions_per_seq=20)
        albert_layer = albert_full_model.get_layer("albert_model")
        albert_params = albert_full_model.weights
        param_values = tf.keras.backend.batch_get_value(
            albert_full_model.weights)

    for ndx, (param_value, param) in enumerate(zip(param_values,
                                                   albert_params)):
        stock_name = weight_map[param.name]

        if stock_name in stock_values:
            ckpt_value = stock_values[stock_name]

            if param_value.shape != ckpt_value.shape:
                print(
                    "loader: Skipping weight:[{}] as the weight shape:[{}] is not compatible "
                    "with the checkpoint:[{}] shape:{}".format(
                        param.name, param.shape, stock_name, ckpt_value.shape))
                skipped_weight_value_tuples.append((param, ckpt_value))
                continue

            weight_value_tuples.append((param, ckpt_value))
            loaded_weights.add(stock_name)
        else:
            print("loader: No value for:[{}], i.e.:[{}] in:[{}]".format(
                param.name, stock_name, tfhub_model_path))
            skip_count += 1
    tf.keras.backend.batch_set_value(weight_value_tuples)

    print("Done loading {} ALBERT weights from: {} into {} (prefix:{}). "
          "Count of weights not found in the checkpoint was: [{}]. "
          "Count of weights with mismatched shape: [{}]".format(
              len(weight_value_tuples), tfhub_model_path, albert_layer,
              "albert", skip_count, len(skipped_weight_value_tuples)))
    print(
        "Unused weights from saved model:", "\n\t" + "\n\t".join(
            sorted(set(stock_values.keys()).difference(loaded_weights))))

    if FLAGS.model_type == "albert_encoder":
        albert_model.save_weights(f"{tfhub_model_path}/tf2_model.h5")
    else:
        albert_full_model.save_weights(f"{tfhub_model_path}/tf2_model_full.h5")