Esempio n. 1
0
def _maybe_overwrite_model_dir_and_session_config(config, model_dir):
  """Overwrite estimator config by `model_dir` and `session_config` if needed.

  Args:
    config: Original estimator config.
    model_dir: Estimator model checkpoint directory.

  Returns:
    Overwritten estimator config.

  Raises:
    ValueError: Model directory inconsistent between `model_dir` and `config`.
  """

  default_session_config = run_config_lib.get_default_session_config()
  if isinstance(config, dict):
    config = RunConfig(**config)
  elif config is None:
    config = RunConfig(session_config=default_session_config)
  if config.session_config is None:
    config = RunConfig.replace(config, session_config=default_session_config)

  if model_dir is not None:
    if (getattr(config, 'model_dir', None) is not None and
        config.model_dir != model_dir):
      raise ValueError(
          "`model_dir` are set both in constructor and `RunConfig`, but with "
          "different values. In constructor: '{}', in `RunConfig`: "
          "'{}' ".format(model_dir, config.model_dir))
    config = RunConfig.replace(config, model_dir=model_dir)
  elif getattr(config, 'model_dir', None) is None:
    model_dir = tempfile.mkdtemp()
    config = RunConfig.replace(config, model_dir=model_dir)

  return config
Esempio n. 2
0
def main():
    sequence_schema_path = f'{input_path}/train/sequence_schema'
    context_schema_path = f'{input_path}/train/context_schema'

    context_schema,  sequence_schema = read_schemata(context_schema_path, sequence_schema_path)

    tf_ctx_schema, tf_seq_schema = build_schema(context_schema, sequence_schema)

    train_parts = glob.glob(input_path + '/train' + '/part-*')
    validation_parts = glob.glob(input_path + '/test' + '/part-*')

    run_config = RunConfig(log_step_count_steps=10,
                           save_checkpoints_steps=100,
                           save_summary_steps=200,
                           keep_checkpoint_max=32)

    shared_input_fn = partial(input_fn, params, tf_seq_schema, tf_ctx_schema)

    train_input_fn = partial(shared_input_fn, train_parts)

    validation_input_fn = partial(shared_input_fn, validation_parts)

    train_spec = TrainSpec(train_input_fn, max_steps=1000000)

    eval_spec = EvalSpec(validation_input_fn, steps=200, name='validation', start_delay_secs=30, throttle_secs=1)

    estimator = Estimator(model_fn=model.model_fn,
                          model_dir=model_dir,
                          params=params,
                          config=run_config)

    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                        level=logging.INFO)
    logging.getLogger('tensorflow').propagate = False

    train_and_evaluate(estimator=estimator,
                       train_spec=train_spec,
                       eval_spec=eval_spec)

    prediction = list(estimator.predict(input_fn=partial(predict_input_fn, {'epochs': 1, 'batch_size': 10}, grid)))

    scores = [p.tolist() for p in prediction]

    pairwise_prob = pairwise_probability(scores)

    zero = pairwise_prob[0]

    A_zero = build_diags(zero)

    print(optimize(A_zero).x)
Esempio n. 3
0
    def get_estimator(self):

        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        from tensorflow.python.estimator.model_fn import EstimatorSpec

        def model_fn(features, labels, mode, params):
            with tf.gfile.GFile(self.graph_path, 'rb') as f:

                #
                print("log graph")
                print(self.graph_path)

                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            input_names = ['input_ids', 'input_mask', 'input_type_ids']

            output = tf.import_graph_def(
                graph_def,
                input_map={k + ':0': features[k]
                           for k in input_names},
                return_elements=['final_encodes:0'])

            return EstimatorSpec(mode=mode, predictions={'encodes': output[0]})

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        tmp_config = RunConfig(session_config=config)

        return Estimator(model_fn=model_fn,
                         config=RunConfig(session_config=config),
                         params={'batch_size': self.batch_size})
Esempio n. 4
0
def run_training(
    train_fn,
    model_fn,
    model_dir: str,
    gpu_mem_fraction: float = 0.96,
    log_step: int = 100,
    summary_step: int = 100,
    save_checkpoint_step: int = 1000,
    max_steps: int = 10000,
    eval_step: int = 10,
    eval_throttle: int = 120,
    train_batch_size: int = 128,
    train_hooks=None,
    eval_fn=None,
):
    tf.logging.set_verbosity(tf.logging.INFO)
    dist_strategy = None

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=gpu_mem_fraction)
    config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
    run_config = RunConfig(
        train_distribute=dist_strategy,
        eval_distribute=dist_strategy,
        log_step_count_steps=log_step,
        model_dir=model_dir,
        save_checkpoints_steps=save_checkpoint_step,
        save_summary_steps=summary_step,
        session_config=config,
    )

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       params={},
                                       config=run_config)

    if eval_fn:
        train_spec = tf.estimator.TrainSpec(input_fn=train_fn,
                                            max_steps=max_steps,
                                            hooks=train_hooks)

        eval_spec = tf.estimator.EvalSpec(input_fn=eval_fn,
                                          steps=eval_step,
                                          throttle_secs=eval_throttle)
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    else:
        estimator.train(input_fn=train_fn,
                        max_steps=max_steps,
                        hooks=train_hooks)
Esempio n. 5
0
def process(question, contexts):
    # TODO Replace all abbreviation code

    bert_config = modeling.BertConfig.from_json_file(
        os.path.join(modelDir, 'bert_config.json'))  # Loading bert config
    tokenizer = tokenization.FullTokenizer(
        vocab_file=os.path.join(modelDir, 'vocab.txt'),
        do_lower_case=False)  # Loading tokenizer

    candidates = read_QA(question, contexts)
    eval_features = convert_candidates_to_features(candidates=candidates,
                                                   tokenizer=tokenizer,
                                                   max_seq_length=512,
                                                   doc_stride=256,
                                                   max_query_length=128)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=os.path.join(
                                    modelDir, 'bert_model.ckpt'),
                                use_one_hot_embeddings=False)

    run_config = RunConfig(model_dir=modelDir, save_checkpoints_steps=1000)
    estimator = Estimator(model_fn=model_fn,
                          config=run_config,
                          params={'batch_size': 14})

    predict_input_fn = input_fn_builder(features=eval_features,
                                        seq_length=512,
                                        drop_remainder=True)

    all_results = []
    counter = 0
    RawResult = collections.namedtuple(
        "RawResult", ["unique_id", "start_logits", "end_logits"])
    for result in estimator.predict(predict_input_fn,
                                    yield_single_examples=True):
        unique_id = int(result["unique_ids"])
        start_logits = [float(x) for x in result["start_logits"].flat]
        end_logits = [float(x) for x in result["end_logits"].flat]
        all_results.append(
            RawResult(unique_id=unique_id,
                      start_logits=start_logits,
                      end_logits=end_logits))
        counter += 1
        if len(eval_features) == counter: break

    all_nbest_json = write_QA(candidates, eval_features, all_results, 2, 128,
                              False)
    return all_nbest_json
Esempio n. 6
0
def _maybe_overwrite_model_dir_and_session_config(config, model_dir):
    """Overwrite estimator config by `model_dir` and `session_config` if needed.

  Args:
    config: Original estimator config.
    model_dir: Estimator model checkpoint directory.

  Returns:
    Overwritten estimator config.

  Raises:
    ValueError: Model directory inconsistent between `model_dir` and `config`.
  """

    default_session_config = run_config_lib.get_default_session_config()
    if isinstance(config, dict):
        config = RunConfig(**config)
    elif config is None:
        config = RunConfig(session_config=default_session_config)
    if config.session_config is None:
        config = RunConfig.replace(config,
                                   session_config=default_session_config)

    if model_dir is not None:
        if (getattr(config, 'model_dir', None) is not None
                and config.model_dir != model_dir):
            raise ValueError(
                "`model_dir` are set both in constructor and `RunConfig`, but with "
                "different values. In constructor: '{}', in `RunConfig`: "
                "'{}' ".format(model_dir, config.model_dir))
        config = RunConfig.replace(config, model_dir=model_dir)
    elif getattr(config, 'model_dir', None) is None:
        model_dir = tempfile.mkdtemp()
        config = RunConfig.replace(config, model_dir=model_dir)

    return config
Esempio n. 7
0
    def _build_estimator(self):
        def model_fn(features, mode):
            with tf.gfile.GFile(self._graphdef, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            output = tf.import_graph_def(
                graph_def,
                input_map={k + ':0': features[k]
                           for k in self._input_names},
                return_elements=['final_encodes:0'])

            return EstimatorSpec(mode=mode, predictions={'output': output[0]})

        return Estimator(model_fn=model_fn,
                         config=RunConfig(session_config=self._config))
def get_estimator(bert_config_file,
                  init_checkpoint,
                  max_seq_len,
                  select_layers,
                  batch_size=32,
                  graph_file='../bert/tmp/graph',
                  model_dir='../bert/tmp'):
    #from tensorflow.python.estimator.estimator import Estimator
    #from tensorflow.python.estimator.run_config import RunConfig
    #from tensorflow.python.estimator.model_fn import EstimatorSpec

    if os.path.exists(graph_file):
        graph_path = graph_file
    else:
        graph_path = create_graph(graph_file, bert_config_file,
                                  init_checkpoint, max_seq_len, select_layers)

    def model_fn(features, labels, mode, params):
        with tf.gfile.GFile(graph_path, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())

        input_names = ['input_ids', 'input_mask', 'input_type_ids']

        encoder_layer = tf.import_graph_def(
            graph_def,
            input_map={k + ':0': features[k]
                       for k in input_names},
            return_elements=['final_encodes:0'])
        predictions = {
            # 'client_id': client_id,
            'encodes': encoder_layer[0]
        }

        return EstimatorSpec(mode=mode, predictions=predictions)

    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
    #config.log_device_placement = False
    #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

    return Estimator(model_fn=model_fn,
                     config=RunConfig(session_config=config),
                     params={'batch_size': batch_size},
                     model_dir=model_dir)
Esempio n. 9
0
    def get_estimator(self):
        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        bert_config = modeling.BertConfig.from_json_file(args.config_name)
        init_checkpoint = args.ckpt_name
        model_fn = self.model_fn_builder(
                bert_config=bert_config,
                init_checkpoint=init_checkpoint,
                use_one_hot_embeddings=False)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False

        return Estimator(model_fn=model_fn, config=RunConfig(session_config=config),model_dir=args.model_dir,
                         params={'batch_size': self.batch_size})
Esempio n. 10
0
    def get_estimator(self):
        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        from tensorflow.python.estimator.model_fn import EstimatorSpec

        def classification_model_fn(features):
            """
            文本分类模型的model_fn
            :param features:
            :param labels:
            :param mode:
            :param params:
            :return:
            """
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            input_map = {"input_ids": input_ids, "input_mask": input_mask}
            pred_probs = tf.import_graph_def(graph_def,
                                             name='',
                                             input_map=input_map,
                                             return_elements=['pred_prob:0'])

            return EstimatorSpec(mode=tf.estimator.ModeKeys.PREDICT,
                                 predictions={
                                     'encodes': tf.argmax(pred_probs[0],
                                                          axis=-1),
                                     'score': tf.reduce_max(pred_probs[0],
                                                            axis=-1)
                                 })

        # 0 表示只使用CPU 1 表示使用GPU
        config = tf.ConfigProto(device_count={'GPU': 1})
        config.gpu_options.allow_growth = True
        # config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False
        # session-wise XLA doesn't seem to work on tf 1.10
        # if args.xla:
        #     config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        return Estimator(model_fn=classification_model_fn,
                         config=RunConfig(session_config=config))
Esempio n. 11
0
def get_estimator(args, tf, graph_path):
    from tensorflow.python.estimator.estimator import Estimator
    from tensorflow.python.estimator.run_config import RunConfig
    from tensorflow.python.estimator.model_fn import EstimatorSpec

    def model_fn(features, labels, mode, params):
        with tf.gfile.GFile(graph_path, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())

        input_names = ['input_ids', 'input_mask', 'input_type_ids']

        output = tf.import_graph_def(
            graph_def,
            input_map={k + ':0': features[k]
                       for k in input_names},
            return_elements=['final_encodes:0'])

        if args.fp16:
            return EstimatorSpec(mode=mode,
                                 predictions={
                                     'unique_ids ': features['unique_ids'],
                                     'encodes': tf.cast(output[0], tf.float32)
                                 })
        else:
            return EstimatorSpec(mode=mode,
                                 predictions={
                                     'unique_ids ': features['unique_ids'],
                                     'encodes': output[0]
                                 })

    config = tf.ConfigProto(device_count={'GPU': 0},
                            intra_op_parallelism_threads=16,
                            inter_op_parallelism_threads=1)
    config.log_device_placement = False
    config.intra_op_parallelism_threads = 16
    config.inter_op_parallelism_threads = 1
    # session-wise XLA doesn't seem to work on tf 1.10
    # if args.xla:
    #     config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

    return Estimator(model_fn=model_fn,
                     config=RunConfig(model_dir=args.checkpoint_dir,
                                      session_config=config))
Esempio n. 12
0
    def get_estimator(self, tf):
        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        from tensorflow.python.estimator.model_fn import EstimatorSpec

        def model_fn(features, labels, mode, params):
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            input_names = ['input_ids', 'input_mask', 'input_type_ids']

            output = tf.import_graph_def(
                graph_def,
                input_map={k + ':0': features[k]
                           for k in input_names},
                return_elements=['final_encodes:0'])

            return EstimatorSpec(
                mode=mode,
                predictions={
                    'client_id': features['client_id'],
                    'input_ids': features['input_ids'],  # [mnb] debug
                    'input_mask': features['input_mask'],  # [mnb] debug
                    'input_type_ids':
                    features['input_type_ids'],  # [mnb] debug
                    'encodes': output[0]
                })

        config = tf.ConfigProto(
            device_count={'GPU': 0 if self.device_id < 0 else 1})
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False
        # session-wise XLA doesn't seem to work on tf 1.10
        # if args.xla:
        #     config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        return Estimator(model_fn=model_fn,
                         config=RunConfig(session_config=config))
    def get_estimator(self):

        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig

        bert_config = modeling.BertConfig.from_json_file(args.config_name)
        label_list = self.processor.get_labels()
        train_examples = self.processor.get_train_examples(args.data_dir)
        num_train_steps = int(
            len(train_examples) / self.batch_size * args.num_train_epochs)
        num_warmup_steps = int(num_train_steps * 0.1)

        if self.mode == tf.estimator.ModeKeys.TRAIN:
            init_checkpoint = args.ckpt_name
        else:
            init_checkpoint = args.output_dir

        model_fn = self.model_fn_builder(bert_config=bert_config,
                                         num_labels=len(label_list),
                                         init_checkpoint=init_checkpoint,
                                         learning_rate=args.learning_rate,
                                         num_train_steps=num_train_steps,
                                         num_warmup_steps=num_warmup_steps,
                                         use_one_hot_embeddings=False)

        # config = tf.ConfigProto(allow_soft_placement=True)
        # with tf.Session(config=config) as sess:
        #     print(model_fn)
        #     sess.run(tf.Print(model_fn, [model_fn]))

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        config = tf.ConfigProto(gpu_options=gpu_options)
        # config.gpu_options.allow_growth = True
        # config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction
        config.log_device_placement = False
        return Estimator(model_fn=model_fn,
                         config=RunConfig(session_config=config),
                         model_dir=args.output_dir,
                         params={'batch_size': self.batch_size})
Esempio n. 14
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    # Get corpus info
    FLAGS.n_token = data_utils.VOCAB_SIZE
    tf.logging.info('n_token {}'.format(FLAGS.n_token))

    if not tf.gfile.Exists(FLAGS.model_dir):
        tf.gfile.MakeDirs(FLAGS.model_dir)

    bsz_per_core = FLAGS.train_batch_size

    train_input_fn, train_record_info_dict = get_input_fn(
        'train', bsz_per_core)
    tf.logging.info('num of batches {}'.format(
        train_record_info_dict['num_batch']))
    train_cache_fn = get_cache_fn(FLAGS.mem_len, bsz_per_core)
    tf.logging.info(train_cache_fn)

    log_every_n_steps = 10
    run_config = RunConfig(
        log_step_count_steps=log_every_n_steps,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=FLAGS.save_steps,
        save_summary_steps=None,
    )
    model_fn = get_model_fn()
    tf.logging.info('Use normal Estimator')
    estimator = Estimator(
        model_fn=model_fn,
        params={
            'batch_size': bsz_per_core,
            'cache': None
        },
        config=run_config,
    )

    tf.logging.info('***** Running evaluation *****')
    tf.logging.info('  Batch size = %d', FLAGS.train_batch_size)
    estimator.evaluate(input_fn=train_input_fn, steps=100)
Esempio n. 15
0
    def get_estimator(self, tf):
        """Get tf estimator
        """
        def model_fn(features, labels, mode, params):
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            input_names = ['input_ids', 'input_mask', 'input_type_ids']

            output = tf.import_graph_def(graph_def,
                                         input_map={k + ':0': features[k] for k in input_names},
                                         return_elements=['final_encodes:0'])

            return EstimatorSpec(mode=mode, predictions={
                'encodes': output[0]
            })

        config = tf.ConfigProto(device_count={'GPU': 0 if self.device_id < 0 else 1})
        config.gpu_options.allow_growth = True
        config.log_device_placement = False
        return Estimator(model_fn=model_fn, config=RunConfig(session_config=config))
Esempio n. 16
0
    def get_estimator(self, tf,device_id=0):
        # 加载图模型
        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        from tensorflow.python.estimator.model_fn import EstimatorSpec

        def ner_model_fn(features, labels, mode, params):
            """
            命名实体识别模型的model_fn
            :param features:
            :param labels:
            :param mode:
            :param params:
            :return:
            """
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            input_map = {"input_ids": input_ids, "input_mask": input_mask}
            pred_ids = tf.import_graph_def(graph_def, name='', input_map=input_map, return_elements=['pred_ids:0'])

            return EstimatorSpec(mode=mode, predictions={
                'encodes': pred_ids[0]
            })

        # 0 表示只使用CPU 1 表示使用GPU
        config = tf.ConfigProto(device_count={'GPU': 0 if device_id < 0 else 1})
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False
        # session-wise XLA doesn't seem to work on tf 1.10
        # if args.xla:
        #     config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        if self.mode == 'NER':
            return Estimator(model_fn=ner_model_fn, config=RunConfig(session_config=config))
Esempio n. 17
0
def construct_estimator(output_dir, save_checkpoint_steps, model_config,
                        init_checkpoint, learning_rate, max_seq_length,
                        use_mask, label2idx_map, num_output, train_batch_size,
                        eval_batch_size, lr_decay):
    mask_crf = MaskedCRF(use_mask=use_mask,
                         label2idx_map=label2idx_map,
                         num_output=num_output)

    run_config = RunConfig(model_dir=output_dir,
                           save_checkpoints_steps=save_checkpoint_steps)
    model_fn = model_fn_builder(model_config=model_config,
                                init_checkpoint=init_checkpoint,
                                learning_rate=learning_rate,
                                lr_decay=lr_decay,
                                max_seq_length=max_seq_length,
                                num_output=num_output,
                                mask_crf=mask_crf)
    estimator = Estimator(model_fn=model_fn,
                          params={
                              "train_batch_size": train_batch_size,
                              "eval_batch_size": eval_batch_size
                          },
                          config=run_config)
    return estimator
Esempio n. 18
0
    def get_estimator(self):

        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig

        if self.mode == tf.estimator.ModeKeys.TRAIN:
            init_checkpoint = args.ckpt_name
            train_examples = self.processor.get_train_examples(args.data_dir)
            num_train_steps = int(
                len(train_examples) / self.batch_size * args.num_train_epochs)
        else:
            init_checkpoint = args.output_dir
            num_train_steps = int(30522)
        bert_config = modeling.BertConfig.from_json_file(args.config_name)
        label_list = self.processor.get_labels()
        num_warmup_steps = int(num_train_steps * 0.1)
        model_fn = self.model_fn_builder(bert_config=bert_config,
                                         num_labels=len(label_list),
                                         init_checkpoint=init_checkpoint,
                                         learning_rate=args.learning_rate,
                                         num_train_steps=num_train_steps,
                                         num_warmup_steps=num_warmup_steps,
                                         use_one_hot_embeddings=False)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction
        config.log_device_placement = False
        return Estimator(model_fn=model_fn,
                         config=RunConfig(session_config=config,
                                          save_summary_steps=100,
                                          save_checkpoints_steps=100,
                                          keep_checkpoint_max=1),
                         model_dir=args.output_dir,
                         params={'batch_size': self.batch_size})
Esempio n. 19
0
def main(_):
    if not FLAGS.do_predict_one:
        tf.logging.set_verbosity(tf.logging.INFO)
    log_writer()

    processors = {"senti": SentimentProcessor}

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = False

    run_config = RunConfig(session_config=config,
                           model_dir=FLAGS.output_dir,
                           save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_one_hot_embeddings=False)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
        params={"batch_size": FLAGS.train_batch_size})
    print(FLAGS.do_train)
    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        # if FLAGS.use_tpu:
        #   # TPU requires a fixed batch size for all batches, therefore the number
        #   # of examples must be a multiple of the batch size, or else examples
        #   # will get dropped. So we pad with fake examples which are ignored
        #   # later on. These do NOT count towards the metric (all tf.metrics
        #   # support a per-instance weight, and these get a weight of 0.0).
        #   while len(eval_examples) % FLAGS.eval_batch_size != 0:
        #     eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        # if FLAGS.use_tpu:
        #   assert len(eval_examples) % FLAGS.eval_batch_size == 0
        #   eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        # if FLAGS.use_tpu:
        #   # TPU requires a fixed batch size for all batches, therefore the number
        #   # of examples must be a multiple of the batch size, or else examples
        #   # will get dropped. So we pad with fake examples which are ignored
        #   # later on.
        #   while len(predict_examples) % FLAGS.predict_batch_size != 0:
        #     predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                probabilities = prediction
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities)
                output_line = "\t".join([
                    str(predict_examples[i].label), output_line,
                    predict_examples[i].text_a
                ]) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples

    if FLAGS.do_predict_one:
        import time
        while True:
            start = time.clock()
            predict_examples = processor.get_input_example()
            num_actual_predict_examples = len(predict_examples)

            predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
            file_based_convert_examples_to_features(predict_examples,
                                                    label_list,
                                                    FLAGS.max_seq_length,
                                                    tokenizer, predict_file)

            predict_drop_remainder = False
            predict_input_fn = file_based_input_fn_builder(
                input_file=predict_file,
                seq_length=FLAGS.max_seq_length,
                is_training=False,
                drop_remainder=predict_drop_remainder)
            print("time:", time.clock() - start)
            start = time.clock()
            result = estimator.predict(input_fn=predict_input_fn)
            for r in result:
                print(r)
            print("time1111111111111111:", time.clock() - start)
Esempio n. 20
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            'At least one of `do_train` or `do_eval` must be True.')

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(','):
        input_files.extend(tf.gfile.Glob(input_pattern))

    tf.logging.info('*** Input Files ***')
    for input_file in input_files:
        tf.logging.info('  %s' % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
        tf.logging.info('Use normal RunConfig')
        tf.logging.info(FLAGS.num_gpu_cores)
        dist_strategy = tf.contrib.distribute.MirroredStrategy(
            num_gpus=FLAGS.num_gpu_cores,
            auto_shard_dataset=True,
            cross_device_ops=AllReduceCrossDeviceOps(
                'nccl', num_packs=FLAGS.num_gpu_cores),
            # cross_device_ops=AllReduceCrossDeviceOps('hierarchical_copy'),
        )
        log_every_n_steps = 10
        run_config = RunConfig(
            train_distribute=dist_strategy,
            eval_distribute=dist_strategy,
            log_step_count_steps=log_every_n_steps,
            model_dir=FLAGS.output_dir,
            save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        )
    else:
        run_config = tf.contrib.tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            master=FLAGS.master,
            model_dir=FLAGS.output_dir,
            save_checkpoints_steps=FLAGS.save_checkpoints_steps,
            tpu_config=tf.contrib.tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop,
                num_shards=FLAGS.num_tpu_cores,
                per_host_input_for_training=is_per_host,
            ),
        )

    model_fn = model_fn_builder(
        bert_config=bert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=FLAGS.num_train_steps,
        num_warmup_steps=FLAGS.num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
    )

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.

    if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
        tf.logging.info('Use normal Estimator')
        estimator = Estimator(model_fn=model_fn, params={}, config=run_config)

    else:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size,
        )

    if FLAGS.do_train:
        tf.logging.info('***** Running training *****')
        tf.logging.info('  Batch size = %d', FLAGS.train_batch_size)

        if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
            train_input_fn = input_fn_builder_gpu(
                input_files=input_files,
                max_seq_length=FLAGS.max_seq_length,
                max_predictions_per_seq=FLAGS.max_predictions_per_seq,
                is_training=True,
                batch_size=per_device_batch_size(FLAGS.train_batch_size,
                                                 FLAGS.num_gpu_cores),
            )
        else:
            train_input_fn = input_fn_builder(
                input_files=input_files,
                max_seq_length=FLAGS.max_seq_length,
                max_predictions_per_seq=FLAGS.max_predictions_per_seq,
                is_training=True,
            )
        estimator.train(input_fn=train_input_fn,
                        max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf.logging.info('***** Running evaluation *****')
        tf.logging.info('  Batch size = %d', FLAGS.eval_batch_size)

        if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2:
            train_input_fn = input_fn_builder_gpu(
                input_files=input_files,
                max_seq_length=FLAGS.max_seq_length,
                max_predictions_per_seq=FLAGS.max_predictions_per_seq,
                is_training=False,
                batch_size=FLAGS.eval_batch_size,
            )
        else:
            eval_input_fn = input_fn_builder(
                input_files=input_files,
                max_seq_length=FLAGS.max_seq_length,
                max_predictions_per_seq=FLAGS.max_predictions_per_seq,
                is_training=False,
            )

        result = estimator.evaluate(input_fn=eval_input_fn,
                                    steps=FLAGS.max_eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, 'eval_results.txt')
        with tf.gfile.GFile(output_eval_file, 'w') as writer:
            tf.logging.info('***** Eval results *****')
            for key in sorted(result.keys()):
                tf.logging.info('  %s = %s', key, str(result[key]))
                writer.write('%s = %s\n' % (key, str(result[key])))
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.gfile.Glob(input_pattern))

    tf.logging.info("*** Input Files ***")
    for input_file in input_files:
        tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    dist_strategy = tf.contrib.distribute.MirroredStrategy(
        num_gpus=FLAGS.n_gpus,
        cross_device_ops=AllReduceCrossDeviceOps('nccl',
                                                 num_packs=FLAGS.n_gpus),
    )
    ''' IF ERROR COULD TRY
    dist_strategy = tf.contrib.distribute.MirroredStrategy(
        devices=["device:GPU:%d" % i for i in range(FLAGS.n_gpus)],
        cross_tower_ops=tf.distribute.HierarchicalCopyAllReduce())
    '''

    log_every_n_steps = 8
    run_config = RunConfig(train_distribute=dist_strategy,
                           eval_distribute=dist_strategy,
                           log_step_count_steps=log_every_n_steps,
                           model_dir=FLAGS.output_dir,
                           save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = Estimator(model_fn=model_fn, params={}, config=run_config)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        train_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=True)
        estimator.train(input_fn=train_input_fn,
                        max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False)

        result = estimator.evaluate(input_fn=eval_input_fn,
                                    steps=FLAGS.max_eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
Esempio n. 22
0
checkpoint_fp = os.path.join(model_dir, 'bert_model.ckpt')
vocab_fp = os.path.join(model_dir, 'vocab.txt')
tokenizer = tokenization.FullTokenizer(vocab_file=vocab_fp)
max_seq_len = 10
worker_id = id
daemon = True
model_fn = model_fn_builder(
    bert_config=modeling.BertConfig.from_json_file(config_fp),
    init_checkpoint=checkpoint_fp,
    pooling_strategy=PoolingStrategy.NONE,
    pooling_layer=[-2])
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.3
estimator = Estimator(model_fn,
                      config=RunConfig(session_config=config),
                      model_dir=None)


def input_fn_builder(msg):
    def gen():
        for i in range(1):
            tmp_f = list(convert_lst_to_features(msg, max_seq_len, tokenizer))
            yield {
                'input_ids': [f.input_ids for f in tmp_f],
                'input_mask': [f.input_mask for f in tmp_f],
                'input_type_ids': [f.input_type_ids for f in tmp_f]
            }

    def input_fn():
        for i in gen():
Esempio n. 23
0
def run_training(
    train_fn,
    model_fn,
    model_dir: str,
    num_gpus: int = 1,
    gpu_mem_fraction: float = 0.95,
    log_step: int = 100,
    summary_step: int = 100,
    save_checkpoint_step: int = 1000,
    max_steps: int = 10000,
    eval_step: int = 10,
    eval_throttle: int = 120,
    use_tpu: bool = False,
    tpu_name: str = None,
    tpu_zone: str = None,
    gcp_project: str = None,
    iterations_per_loop: int = 100,
    num_tpu_cores: int = 8,
    train_batch_size: int = 128,
    train_hooks=None,
    eval_fn=None,
):
    tf.logging.set_verbosity(tf.logging.INFO)

    if num_gpus > 1 and not use_tpu:
        dist_strategy = tf.contrib.distribute.MirroredStrategy(
            num_gpus=num_gpus,
            auto_shard_dataset=True,
            cross_device_ops=AllReduceCrossDeviceOps('nccl',
                                                     num_packs=num_gpus),
        )
    else:
        dist_strategy = None

    if use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            tpu_name, zone=tpu_zone, project=gcp_project)
        is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
        run_config = tf.contrib.tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            master=None,
            model_dir=model_dir,
            save_checkpoints_steps=save_checkpoint_step,
            tpu_config=tf.contrib.tpu.TPUConfig(
                iterations_per_loop=iterations_per_loop,
                num_shards=num_tpu_cores,
                per_host_input_for_training=is_per_host,
            ),
        )
    else:
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_mem_fraction)
        config = tf.ConfigProto(allow_soft_placement=True,
                                gpu_options=gpu_options)
        run_config = RunConfig(
            train_distribute=dist_strategy,
            eval_distribute=dist_strategy,
            log_step_count_steps=log_step,
            model_dir=model_dir,
            save_checkpoints_steps=save_checkpoint_step,
            save_summary_steps=summary_step,
            session_config=config,
        )

    if use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=train_batch_size,
            eval_batch_size=None,
        )
        eval_fn = None

    else:

        estimator = tf.estimator.Estimator(model_fn=model_fn,
                                           params={},
                                           config=run_config)

    if eval_fn:
        train_spec = tf.estimator.TrainSpec(input_fn=train_fn,
                                            max_steps=max_steps,
                                            hooks=train_hooks)

        eval_spec = tf.estimator.EvalSpec(input_fn=eval_fn,
                                          steps=eval_step,
                                          throttle_secs=eval_throttle)
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    else:
        estimator.train(input_fn=train_fn,
                        max_steps=max_steps,
                        hooks=train_hooks)
Esempio n. 24
0
    def get_estimator(self, tf):
        from tensorflow.python.estimator.estimator import Estimator
        from tensorflow.python.estimator.run_config import RunConfig
        from tensorflow.python.estimator.model_fn import EstimatorSpec

        def model_fn(features, labels, mode, params):
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())

            input_names = ['input_ids', 'input_mask', 'input_type_ids']

            output = tf.import_graph_def(
                graph_def,
                input_map={k + ':0': features[k]
                           for k in input_names},
                return_elements=['final_encodes:0'])

            return EstimatorSpec(mode=mode,
                                 predictions={
                                     'client_id': features['client_id'],
                                     'encodes': output[0]
                                 })

        def ner_model_fn(features, labels, mode, params):
            """
            命名实体识别模型的model_fn
            :param features:
            :param labels:
            :param mode:
            :param params:
            :return:
            """
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            input_map = {"input_ids": input_ids, "input_mask": input_mask}
            pred_ids = tf.import_graph_def(graph_def,
                                           name='',
                                           input_map=input_map,
                                           return_elements=['pred_ids:0'])

            return EstimatorSpec(mode=mode,
                                 predictions={
                                     'client_id': features['client_id'],
                                     'encodes': pred_ids[0]
                                 })

        def classification_model_fn(features, labels, mode, params):
            """
            文本分类模型的model_fn
            :param features:
            :param labels:
            :param mode:
            :param params:
            :return:
            """
            with tf.gfile.GFile(self.graph_path, 'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            input_map = {"input_ids": input_ids, "input_mask": input_mask}
            pred_probs = tf.import_graph_def(graph_def,
                                             name='',
                                             input_map=input_map,
                                             return_elements=['pred_prob:0'])

            return EstimatorSpec(
                mode=mode,
                predictions={
                    'client_id': features['client_id'],
                    #'encodes': tf.argmax(pred_probs[0], axis=-1),
                    #'score': tf.reduce_max(pred_probs[0], axis=-1)
                    'encodes': pred_probs[0],
                    'score': pred_probs[0]
                })

        # 0 表示只使用CPU 1 表示使用GPU
        config = tf.ConfigProto(
            device_count={'GPU': 0 if self.device_id < 0 else 1})
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction
        config.log_device_placement = False
        # session-wise XLA doesn't seem to work on tf 1.10
        # if args.xla:
        #     config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        if self.mode == 'NER':
            return Estimator(model_fn=ner_model_fn,
                             config=RunConfig(session_config=config))
        elif self.mode == 'BERT':
            return Estimator(model_fn=model_fn,
                             config=RunConfig(session_config=config))
        elif self.mode == 'CLASS':
            return Estimator(model_fn=classification_model_fn,
                             config=RunConfig(session_config=config))
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "xnli": XnliProcessor,
        "qqp": QqpProcessor,
        'chnsenticorp': ChnsenticorpProcessor,
        'gt': GTProcessor,
        'tcl': TCLProcessor
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    # https://github.com/tensorflow/tensorflow/issues/21470#issuecomment-422506263
    dist_strategy = tf.contrib.distribute.MirroredStrategy(
        num_gpus=FLAGS.num_gpu_cores,
        cross_device_ops=AllReduceCrossDeviceOps('nccl', num_packs=FLAGS.num_gpu_cores),
        # cross_device_ops=AllReduceCrossDeviceOps('hierarchical_copy'),
    )
    log_every_n_steps = 8
    dist_run_config = RunConfig(
        train_distribute=dist_strategy,
        eval_distribute=dist_strategy,
        log_step_count_steps=log_every_n_steps,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    tpu_run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    init_checkpoint = FLAGS.init_checkpoint
    is_multi_gpu = FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2
    model_fn = model_fn_builder(
        bert_config=bert_config,
        num_labels=len(label_list),
        init_checkpoint=init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        use_gpu=FLAGS.use_gpu,
        num_gpu_cores=FLAGS.num_gpu_cores,
        fp16=FLAGS.use_fp16,
        weight_list = FLAGS.weight_list)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    if is_multi_gpu:
        estimator = Estimator(
            model_fn=model_fn,
            params={},
            config=dist_run_config)
    else:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=tpu_run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(
            train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            batch_size=FLAGS.train_batch_size)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
        # TF Serving
        if FLAGS.save_for_serving:
            serving_dir = os.path.join(FLAGS.output_dir, 'serving')
            save_for_serving(estimator, serving_dir, FLAGS.max_seq_length, not is_multi_gpu)

        # Find the latest checkpoint
        max_idx = 0
        for filename in os.listdir(FLAGS.output_dir):
            if filename.startswith('model.ckpt-'):
                max_idx = max(int(filename.split('.')[1].split('-')[1]), max_idx)
        init_checkpoint = os.path.join(FLAGS.output_dir, f'model.ckpt-{max_idx}')
        tf.logging.info(f'Current checkpoint: {init_checkpoint}')

    if FLAGS.do_eval:
        model_fn = model_fn_builder(
            bert_config=bert_config,
            num_labels=len(label_list),
            init_checkpoint=init_checkpoint,
            learning_rate=FLAGS.learning_rate,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps,
            use_tpu=FLAGS.use_tpu,
            use_one_hot_embeddings=FLAGS.use_tpu,
            use_gpu=FLAGS.use_gpu,
            num_gpu_cores=FLAGS.num_gpu_cores,
            fp16=FLAGS.use_fp16,
            weight_list = FLAGS.weight_list)

        eval_estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=tpu_run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(
            eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder,
            batch_size=FLAGS.eval_batch_size)

        result = eval_estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

        # dump result as json file (easy parsing for other tasks)
        class ExtEncoder(json.JSONEncoder):
            def default(self, obj):
                if isinstance(obj, np.integer):
                    return int(obj)
                if isinstance(obj, np.floating):
                    return float(obj)
                if isinstance(obj, np.ndarray):
                    return obj.tolist()
                else:
                    return super(ExtEncoder, self).default(obj)

        output_eval_file2 = os.path.join(FLAGS.output_dir, "eval_results.json")
        with tf.gfile.GFile(output_eval_file2, "w") as writer:
            json.dump(result, writer, indent=4, cls=ExtEncoder)

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length, tokenizer,
                                                predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder,
            batch_size=FLAGS.predict_batch_size)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Esempio n. 26
0

def get_encodes(x):
    # x is `batch_size` of lines, each of which is a json object
    samples = [json.loads(l) for l in x]
    text = [s['fact'][:50] + s['fact'][-50:] for s in samples]
    features = bc_client.encode(text)
    labels = [[str(random.choice(s['meta']['relevant_articles']))]
              for s in samples]
    return features, labels


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
run_config = RunConfig(model_dir='/data/cips/save/%s' % MODEL_ID,
                       session_config=config,
                       save_checkpoints_steps=2000)

estimator = DNNClassifier(hidden_units=[512],
                          feature_columns=[
                              tf.feature_column.numeric_column('feature',
                                                               shape=(768, ))
                          ],
                          n_classes=len(laws),
                          config=run_config,
                          label_vocabulary=laws_str,
                          dropout=0.1)

input_fn = lambda fp: (tf.data.TextLineDataset(fp).apply(
    tf.contrib.data.shuffle_and_repeat(buffer_size=10000)).batch(batch_size).
                       map(lambda x: tf.py_func(
Esempio n. 27
0
def main(_):
    if not FLAGS.do_predict_one:
        tf.logging.set_verbosity(tf.logging.INFO)
    log_writer()

    processors = {"senti": SentimentProcessor}

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    # if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict and not FLAGS.do_predict_one:
    #   raise ValueError(
    #       "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    # if FLAGS.use_tpu and FLAGS.tpu_name:
    #   tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
    #       FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = False

    run_config = RunConfig(session_config=config,
                           model_dir=FLAGS.output_dir,
                           save_checkpoints_steps=FLAGS.save_checkpoints_steps)

    input_ids, input_mask, segment_ids, label_ids, total_loss, per_example_loss, logits, probabilities = convert_ckpt_to_saved_model(
        FLAGS.bert_config_file, FLAGS.init_checkpoint)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(init)
        ckpt = tf.train.latest_checkpoint(FLAGS.output_dir)  # 找到存储变量值的位置
        saver.restore(sess, ckpt)  # 加载到当前环境中
        print('finish loading model!')
        while True:
            string = input("Please input:")
            example = InputExample(guid="0",
                                   text_a=string,
                                   text_b="",
                                   label="1")
            feature = convert_examples_to_features([example], label_list,
                                                   FLAGS.max_seq_length,
                                                   tokenizer)[0]
            result = sess.run(
                [probabilities],
                feed_dict={
                    input_ids: [feature.input_ids],
                    input_mask: [feature.input_mask],
                    segment_ids: [feature.segment_ids]
                })
            print('probabilities:' + str(result))
Esempio n. 28
0
def train(params: user_params, input_):
    # estimator运行环境配置
    session_config = tf.ConfigProto()
    session_config.allow_soft_placement = True
    session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
    session_config.gpu_options.allow_growth = True

    if FLAGS.gpu_cores:
        gpu_cors = tuple(FLAGS.gpu_cores)
        devices = ["/device:GPU:%d" % int(d) for d in gpu_cors]
        tf.logging.warn("using device: " + " ".join(devices))
        distribution = tf.contrib.distribute.MirroredStrategy(devices=devices)

        tf.logging.warn("in train.py, distribution")
        tf.logging.warn(distribution._devices)

        config = RunConfig(save_checkpoints_steps=FLAGS.check_steps,
                           train_distribute=distribution,
                           keep_checkpoint_max=2,
                           session_config=session_config)
    else:

        config = RunConfig(save_checkpoints_steps=FLAGS.check_steps,
                           keep_checkpoint_max=2,
                           session_config=session_config)

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=FLAGS.model_dir,
                                       config=config,
                                       params=params)

    train_data_dir = input_.get_data_dir(tf.estimator.ModeKeys.TRAIN, params)
    eval_data_dir = input_.get_data_dir(tf.estimator.ModeKeys.EVAL, params)

    hook = [] if not params.enable_ema else [
        LoadEMAHook(params.model_dir, 0.99)
    ]

    listeners = [
        EvalListener(estimator,
                     lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                             params=params,
                                             data_dir=train_data_dir),
                     name="train_data",
                     hook=hook),
        EvalListener(estimator,
                     lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                             params=params,
                                             data_dir=eval_data_dir),
                     hook=hook),
        # VariableListener()
    ]

    def train_input_fn():
        return input_.input_fn(mode=tf.estimator.ModeKeys.TRAIN,
                               params=params,
                               data_dir=train_data_dir)

    # gpu cluster
    if config.cluster_spec:
        train_spec = MyTraining.TrainSpec(train_input_fn, FLAGS.max_steps)
        eval_spec = MyTraining.EvalSpec(
            lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                    params=params,
                                    data_dir=train_data_dir),
            steps=FLAGS.check_steps)
        MyTraining.train_and_evaluate(estimator, train_spec, eval_spec,
                                      listeners)
        if config.task_type == TaskType.CHIEF:
            model_dir = estimator.export_savedmodel(
                FLAGS.model_dir, input_.get_input_reciever_fn())
            tf.logging.warn("save model to %s" % model_dir)

    # cpu solo
    else:
        # from tensorflow.python import debug as tf_debug
        # debug_hook = [tf_debug.LocalCLIDebugHook(ui_type="readline")]
        # estimator.train(train_input_fn, max_steps=FLAGS.max_steps, saving_listeners=listeners, hooks=debug_hook)
        estimator.train(train_input_fn,
                        max_steps=FLAGS.max_steps,
                        saving_listeners=listeners)
        dir = estimator.export_savedmodel(tf.flags.FLAGS.model_dir,
                                          input_.get_input_reciever_fn())
        tf.logging.warn("save model to %s" % dir)

    for listener in listeners:
        print(listener.name)
        print(listener.history)
Esempio n. 29
0

def get_encodes(x):
    # x is `batch_size` of lines, each of which is a json object
    samples = [json.loads(l) for l in x]
    text = [s['fact'][:50] + s['fact'][-50:] for s in samples]
    features = bc.encode(text)
    # randomly choose a label
    labels = [[str(random.choice(s['meta']['relevant_articles']))] for s in samples]
    return features, labels


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
run_config = RunConfig(model_dir='/data/cips/save/law-model',
                       session_config=config,
                       save_checkpoints_steps=1000)

estimator = DNNClassifier(
    hidden_units=[512],
    feature_columns=[tf.feature_column.numeric_column('feature', shape=(768,))],
    n_classes=len(laws),
    config=run_config,
    label_vocabulary=laws_str,
    dropout=0.1)

input_fn = lambda fp: (tf.data.TextLineDataset(fp)
                       .apply(tf.contrib.data.shuffle_and_repeat(buffer_size=10000))
                       .batch(batch_size)
                       .map(lambda x: tf.py_func(get_encodes, [x], [tf.float32, tf.string], name='bert_client'),
                            num_parallel_calls=num_parallel_calls)
    def _Run(self, is_training, use_trt, batch_size, num_epochs, model_dir):
        """Train or evaluate the model.

    Args:
      is_training: whether to train or evaluate the model. In training mode,
        quantization will be simulated where the quantize_and_dequantize_v2 are
        placed.
      use_trt: if true, use TRT INT8 mode for evaluation, which will perform
        real quantization. Otherwise use native TensorFlow which will perform
        simulated quantization. Ignored if is_training is True.
      batch_size: batch size.
      num_epochs: how many epochs to train. Ignored if is_training is False.
      model_dir: where to save or load checkpoint.

    Returns:
      The Estimator evaluation result.
    """
        # Get dataset
        train_data, test_data = mnist.load_data()

        def _PreprocessFn(x, y):
            x = math_ops.cast(x, dtypes.float32)
            x = array_ops.expand_dims(x, axis=2)
            x = 2.0 * (x / 255.0) - 1.0
            y = math_ops.cast(y, dtypes.int32)
            return x, y

        def _EvalInputFn():
            mnist_x, mnist_y = test_data
            dataset = data.Dataset.from_tensor_slices((mnist_x, mnist_y))
            dataset = dataset.apply(
                data.experimental.map_and_batch(map_func=_PreprocessFn,
                                                batch_size=batch_size,
                                                num_parallel_calls=8))
            dataset = dataset.repeat(count=1)
            iterator = dataset.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

        def _TrainInputFn():
            mnist_x, mnist_y = train_data
            dataset = data.Dataset.from_tensor_slices((mnist_x, mnist_y))
            dataset = dataset.shuffle(2 * len(mnist_x))
            dataset = dataset.apply(
                data.experimental.map_and_batch(map_func=_PreprocessFn,
                                                batch_size=batch_size,
                                                num_parallel_calls=8))
            dataset = dataset.repeat(count=num_epochs)
            iterator = dataset.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

        def _ModelFn(features, labels, mode):
            if is_training:
                logits_out = self._BuildGraph(features)
            else:
                graph_def = self._GetGraphDef(use_trt, batch_size, model_dir)
                logits_out = importer.import_graph_def(
                    graph_def,
                    input_map={INPUT_NODE_NAME: features},
                    return_elements=[OUTPUT_NODE_NAME + ':0'],
                    name='')[0]

            loss = losses.sparse_softmax_cross_entropy(labels=labels,
                                                       logits=logits_out)
            summary.scalar('loss', loss)

            classes_out = math_ops.argmax(logits_out,
                                          axis=1,
                                          name='classes_out')
            accuracy = metrics.accuracy(labels=labels,
                                        predictions=classes_out,
                                        name='acc_op')
            summary.scalar('accuracy', accuracy[1])

            if mode == ModeKeys.EVAL:
                return EstimatorSpec(mode,
                                     loss=loss,
                                     eval_metric_ops={'accuracy': accuracy})
            elif mode == ModeKeys.TRAIN:
                optimizer = AdamOptimizer(learning_rate=1e-2)
                train_op = optimizer.minimize(loss,
                                              global_step=get_global_step())
                return EstimatorSpec(mode, loss=loss, train_op=train_op)

        config_proto = config_pb2.ConfigProto()
        config_proto.gpu_options.allow_growth = True
        estimator = Estimator(model_fn=_ModelFn,
                              model_dir=model_dir if is_training else None,
                              config=RunConfig(session_config=config_proto))

        if is_training:
            estimator.train(_TrainInputFn)
        results = estimator.evaluate(_EvalInputFn)
        logging.info('accuracy: %s', str(results['accuracy']))
        return results
Esempio n. 31
0
def bidaf_train(_):

    params = user_params(
        procedure=tuple(FLAGS.procedure),
        label_name=FLAGS.label_name,
        learning_rate=FLAGS.learning_rate,
        embed_size=FLAGS.embed_size,
        embedding_file_path=FLAGS.embedding_file_path,
        context_name=FLAGS.context_name,
        question_name=FLAGS.question_name,
        rnn_hidden_size=FLAGS.rnn_hidden_size,
        data_dir=FLAGS.data_dir,
        model_dir=FLAGS.model_dir,
        batch_size=FLAGS.batch_size,
        drop_out_rate=FLAGS.drop_out_rate,
        p1=FLAGS.p1,
        p2=FLAGS.p2,
        feature_voc_file_path=FLAGS.feature_voc_file_path,
        gpu_cores_list=FLAGS.gpu_cores,
        transfromer_conv_layers=FLAGS.transfromer_conv_layers,
        transfromer_conv_kernel_size=FLAGS.transfromer_conv_kernel_size,
        transfromer_head_number=FLAGS.transfromer_head_number,
        tansformer_d_model=FLAGS.tansformer_d_model,
        clip_norm=FLAGS.clip_norm,
        use_char_embedding=FLAGS.use_char_embedding,
        char_embedding_size=FLAGS.char_embedding_size,
        char_feature_name=FLAGS.char_feature_name,
        char_question_name=FLAGS.char_question_name,
        example_max_length=FLAGS.example_max_length,
        enable_ema=FLAGS.enable_ema,
        ema_decay=FLAGS.ema_decay,
        char_filters=FLAGS.char_filters,
        ans_limit=FLAGS.ans_limit)

    #词向量文件的加载
    enrich_hyper_parameters(params)

    # 配置日志等级
    level_str = 'tf.logging.{}'.format(str(tf.flags.FLAGS.log_level).upper())
    tf.logging.set_verbosity(eval(level_str))

    #加载数据,创建一个SparkInput类对象
    input = SparkInput(params)

    sess_config = tf.ConfigProto()
    sess_config.allow_soft_placement = True
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess_config.gpu_options.allow_growth = True
    #sess_config.report_tensor_allocations_upon_oom = True
    #sess_config.log_device_placement = True

    # estimator运行环境配置
    if FLAGS.gpu_cores:
        gpu_cors = tuple(eval(FLAGS.gpu_cores))  #FLAGS.gpu_cores
        devices = ["/device:GPU:%d" % d
                   for d in gpu_cors]  #"/device:GPU:%d" % d作为元组中的一个元素整体
        distribution = tf.contrib.distribute.MirroredStrategy(
            devices=devices)  #distribution是一个MirroredStrategy类
        config = RunConfig(save_checkpoints_steps=FLAGS.check_steps,
                           train_distribute=distribution)
    else:
        config = RunConfig(save_checkpoints_steps=FLAGS.check_steps,
                           session_config=sess_config)  #config是一个RunConfig类对象

    #estimator创建
    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=FLAGS.model_dir,
                                       config=config,
                                       params=params)

    #得到训练和测试数据的文件路径
    train_data_dir = input.get_data_dir(tf.estimator.ModeKeys.TRAIN,
                                        params.data_dir)
    eval_data_dir = input.get_data_dir(tf.estimator.ModeKeys.EVAL,
                                       params.data_dir)

    #创建EvalListener进行训练和预测的评估
    hook = [] if not params.enable_ema else [
        LoadEMAHook(params.model_dir, FLAGS.ema_decay)
    ]
    listeners = [
        EvalListener(estimator,
                     lambda: input.input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                            params=params,
                                            data_dir=train_data_dir),
                     name="train_data",
                     hook=hook),
        EvalListener(estimator,
                     lambda: input.input_fn(mode=tf.estimator.ModeKeys.EVAL,
                                            params=params,
                                            data_dir=eval_data_dir),
                     hook=hook)
    ]

    #由训练数据的文件路径获取训练数据
    def train_input_fn():
        return input.input_fn(mode=tf.estimator.ModeKeys.TRAIN,
                              params=params,
                              data_dir=train_data_dir)

    #gpu cluster
    if config.cluster_spec:
        train_spec = MyTraining.TrainSpec(train_input_fn, FLAGS.max_steps)
        eval_spec = MyTraining.EvalSpec(lambda: input.input_fn(
            mode=tf.estimator.ModeKeys.EVAL, params=params),
                                        steps=FLAGS.check_steps)
        MyTraining.train_and_evaluate(estimator, train_spec, eval_spec,
                                      listeners)
        if config.task_type == TaskType.CHIEF:
            model_dir = estimator.export_savedmodel(
                FLAGS.model_dir, input.get_input_reciever_fn())
            tf.logging.warn("save model to %s" % model_dir)

    #cpu solo
    else:
        print("执行*************************")
        estimator.train(train_input_fn,
                        max_steps=FLAGS.max_steps,
                        saving_listeners=listeners)
        dir = estimator.export_savedmodel(tf.flags.FLAGS.model_dir,
                                          input.get_input_reciever_fn())
        tf.logging.warn("save model to %s" % dir)

    for listener in listeners:
        print(listener.name)
        print(listener.history)