Exemple #1
0
    def _loadBertConfig_ja_sp(self):
        from utils import str_to_value
        import configparser
        CONFIGPATH = str(BERT_JA_DIR / "config.ini")
        config = configparser.ConfigParser()
        config.read(CONFIGPATH)

        import tempfile
        bert_config_file = tempfile.NamedTemporaryFile(mode='w+t',
                                                       encoding='utf-8',
                                                       suffix='.json')
        bert_config_file.write(
            json.dumps(
                {k: str_to_value(v)
                 for k, v in config['BERT-CONFIG'].items()}))
        bert_config_file.seek(0)
        return modeling.BertConfig.from_json_file(bert_config_file.name)
def load_estimator(config, FLAGS):

    bert_config_file = tempfile.NamedTemporaryFile(mode='w+t',
                                                   encoding='utf-8',
                                                   suffix='.json')
    bert_config_file.write(
        json.dumps(
            {k: str_to_value(v)
             for k, v in config['BERT-CONFIG'].items()}))
    bert_config_file.seek(0)
    bert_config = modeling.BertConfig.from_json_file(bert_config_file.name)

    tpu_cluster_resolver = None
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(FLAGS.task_proc.get_labels()),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    return estimator
import os
import sys
import tempfile
import tensorflow as tf
import utils

CURDIR = os.path.dirname(os.path.abspath(__file__))
CONFIGPATH = os.path.join(CURDIR, os.pardir, 'config.ini')
config = configparser.ConfigParser()
config.read(CONFIGPATH)
bert_config_file = tempfile.NamedTemporaryFile(mode='w+t',
                                               encoding='utf-8',
                                               suffix='.json')
bert_config_file.write(
    json.dumps(
        {k: utils.str_to_value(v)
         for k, v in config['BERT-CONFIG'].items()}))
bert_config_file.seek(0)

sys.path.append(os.path.join(CURDIR, os.pardir, 'bert'))
import modeling
import optimization

flags = tf.flags

FLAGS = flags.FLAGS

# Required parameters
flags.DEFINE_string(
    "bert_config_file", None,
    "The config json file corresponding to the pre-trained BERT model. "
Exemple #4
0
import json
import os
import sys
import tempfile

import tensorflow as tf

import tokenization_sentencepiece as tokenization
import utils

CURDIR = os.path.dirname(os.path.abspath(__file__))
CONFIGPATH = os.path.join(CURDIR, os.pardir, 'config.ini')
config = configparser.ConfigParser()
config.read(CONFIGPATH)
bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json')
bert_config_file.write(json.dumps({k: utils.str_to_value(v) for k, v in config['BERT-CONFIG'].items()}))
bert_config_file.seek(0)

sys.path.append(os.path.join(CURDIR, os.pardir, 'bert'))

import modeling
import optimization

flags = tf.flags

FLAGS = flags.FLAGS

# Required parameters
flags.DEFINE_string(
    "data_dir", None,
    "The input data dir. Should contain the .tsv files (or other data files) "
Exemple #5
0
def main():
    # bert
    bert_config_file = tempfile.NamedTemporaryFile(mode='w+t',
                                                   encoding='utf-8',
                                                   suffix='.json')
    bert_config_file.write(
        json.dumps(
            {k: str_to_value(v)
             for k, v in config['BERT-CONFIG'].items()}))
    bert_config_file.seek(0)  # [注意] 最初からread するから
    bert_config = modeling.BertConfig.from_json_file(bert_config_file.name)
    latest_ckpt = latest_ckpt_model()
    # model.ckpt-11052.index, model.ckpt-11052.meta データの prefix
    finetuned_model_path = latest_ckpt.split('.data-00000-of-00001')[0]
    flags = FLAGS(finetuned_model_path)
    processor = LivedoorProcessor()
    label_list = processor.get_labels()

    # sentencepiece
    tokenizer = tokenization.FullTokenizer(model_file=flags.model_file,
                                           vocab_file=flags.vocab_file,
                                           do_lower_case=flags.do_lower_case)

    # no use TPU
    tpu_cluster_resolver = None
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    # config
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=flags.master,
        model_dir=flags.output_dir,
        save_checkpoints_steps=flags.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=flags.iterations_per_loop,
            num_shards=flags.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=flags.init_checkpoint,
                                learning_rate=flags.learning_rate,
                                num_train_steps=flags.num_train_steps,
                                num_warmup_steps=flags.num_warmup_steps,
                                use_tpu=flags.use_tpu,
                                use_one_hot_embeddings=flags.use_tpu)

    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=flags.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=flags.train_batch_size,
        eval_batch_size=flags.eval_batch_size,
        predict_batch_size=flags.predict_batch_size)

    # テストデータコレクションの取得
    predict_examples = processor.get_test_examples(flags.data_dir)
    predict_file = tempfile.NamedTemporaryFile(mode='w+t',
                                               encoding='utf-8',
                                               suffix='.tf_record')
    """Convert a set of `InputExample`s to a TFRecord file."""
    """出力: predict_file.name """
    # https://github.com/yoheikikuta/bert-japanese/blob/master/src/run_classifier.py#L371-L380
    file_based_convert_examples_to_features(predict_examples, label_list,
                                            flags.max_seq_length, tokenizer,
                                            predict_file.name)
    predict_drop_remainder = True if flags.use_tpu else False

    # TPUEstimatorに渡すクロージャを作成
    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file.name,
        seq_length=flags.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)
    # 推論
    result = estimator.predict(input_fn=predict_input_fn)
    result = list(result)

    # 精度を計算
    accracy(result, label_list)