def _maybe_overwrite_model_dir_and_session_config(config, model_dir): """Overwrite estimator config by `model_dir` and `session_config` if needed. Args: config: Original estimator config. model_dir: Estimator model checkpoint directory. Returns: Overwritten estimator config. Raises: ValueError: Model directory inconsistent between `model_dir` and `config`. """ default_session_config = run_config_lib.get_default_session_config() if isinstance(config, dict): config = RunConfig(**config) elif config is None: config = RunConfig(session_config=default_session_config) if config.session_config is None: config = RunConfig.replace(config, session_config=default_session_config) if model_dir is not None: if (getattr(config, 'model_dir', None) is not None and config.model_dir != model_dir): raise ValueError( "`model_dir` are set both in constructor and `RunConfig`, but with " "different values. In constructor: '{}', in `RunConfig`: " "'{}' ".format(model_dir, config.model_dir)) config = RunConfig.replace(config, model_dir=model_dir) elif getattr(config, 'model_dir', None) is None: model_dir = tempfile.mkdtemp() config = RunConfig.replace(config, model_dir=model_dir) return config
def main(): sequence_schema_path = f'{input_path}/train/sequence_schema' context_schema_path = f'{input_path}/train/context_schema' context_schema, sequence_schema = read_schemata(context_schema_path, sequence_schema_path) tf_ctx_schema, tf_seq_schema = build_schema(context_schema, sequence_schema) train_parts = glob.glob(input_path + '/train' + '/part-*') validation_parts = glob.glob(input_path + '/test' + '/part-*') run_config = RunConfig(log_step_count_steps=10, save_checkpoints_steps=100, save_summary_steps=200, keep_checkpoint_max=32) shared_input_fn = partial(input_fn, params, tf_seq_schema, tf_ctx_schema) train_input_fn = partial(shared_input_fn, train_parts) validation_input_fn = partial(shared_input_fn, validation_parts) train_spec = TrainSpec(train_input_fn, max_steps=1000000) eval_spec = EvalSpec(validation_input_fn, steps=200, name='validation', start_delay_secs=30, throttle_secs=1) estimator = Estimator(model_fn=model.model_fn, model_dir=model_dir, params=params, config=run_config) logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logging.getLogger('tensorflow').propagate = False train_and_evaluate(estimator=estimator, train_spec=train_spec, eval_spec=eval_spec) prediction = list(estimator.predict(input_fn=partial(predict_input_fn, {'epochs': 1, 'batch_size': 10}, grid))) scores = [p.tolist() for p in prediction] pairwise_prob = pairwise_probability(scores) zero = pairwise_prob[0] A_zero = build_diags(zero) print(optimize(A_zero).x)
def get_estimator(self): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def model_fn(features, labels, mode, params): with tf.gfile.GFile(self.graph_path, 'rb') as f: # print("log graph") print(self.graph_path) graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] output = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) return EstimatorSpec(mode=mode, predictions={'encodes': output[0]}) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 tmp_config = RunConfig(session_config=config) return Estimator(model_fn=model_fn, config=RunConfig(session_config=config), params={'batch_size': self.batch_size})
def run_training( train_fn, model_fn, model_dir: str, gpu_mem_fraction: float = 0.96, log_step: int = 100, summary_step: int = 100, save_checkpoint_step: int = 1000, max_steps: int = 10000, eval_step: int = 10, eval_throttle: int = 120, train_batch_size: int = 128, train_hooks=None, eval_fn=None, ): tf.logging.set_verbosity(tf.logging.INFO) dist_strategy = None gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) run_config = RunConfig( train_distribute=dist_strategy, eval_distribute=dist_strategy, log_step_count_steps=log_step, model_dir=model_dir, save_checkpoints_steps=save_checkpoint_step, save_summary_steps=summary_step, session_config=config, ) estimator = tf.estimator.Estimator(model_fn=model_fn, params={}, config=run_config) if eval_fn: train_spec = tf.estimator.TrainSpec(input_fn=train_fn, max_steps=max_steps, hooks=train_hooks) eval_spec = tf.estimator.EvalSpec(input_fn=eval_fn, steps=eval_step, throttle_secs=eval_throttle) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) else: estimator.train(input_fn=train_fn, max_steps=max_steps, hooks=train_hooks)
def process(question, contexts): # TODO Replace all abbreviation code bert_config = modeling.BertConfig.from_json_file( os.path.join(modelDir, 'bert_config.json')) # Loading bert config tokenizer = tokenization.FullTokenizer( vocab_file=os.path.join(modelDir, 'vocab.txt'), do_lower_case=False) # Loading tokenizer candidates = read_QA(question, contexts) eval_features = convert_candidates_to_features(candidates=candidates, tokenizer=tokenizer, max_seq_length=512, doc_stride=256, max_query_length=128) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=os.path.join( modelDir, 'bert_model.ckpt'), use_one_hot_embeddings=False) run_config = RunConfig(model_dir=modelDir, save_checkpoints_steps=1000) estimator = Estimator(model_fn=model_fn, config=run_config, params={'batch_size': 14}) predict_input_fn = input_fn_builder(features=eval_features, seq_length=512, drop_remainder=True) all_results = [] counter = 0 RawResult = collections.namedtuple( "RawResult", ["unique_id", "start_logits", "end_logits"]) for result in estimator.predict(predict_input_fn, yield_single_examples=True): unique_id = int(result["unique_ids"]) start_logits = [float(x) for x in result["start_logits"].flat] end_logits = [float(x) for x in result["end_logits"].flat] all_results.append( RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) counter += 1 if len(eval_features) == counter: break all_nbest_json = write_QA(candidates, eval_features, all_results, 2, 128, False) return all_nbest_json
def _build_estimator(self): def model_fn(features, mode): with tf.gfile.GFile(self._graphdef, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) output = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in self._input_names}, return_elements=['final_encodes:0']) return EstimatorSpec(mode=mode, predictions={'output': output[0]}) return Estimator(model_fn=model_fn, config=RunConfig(session_config=self._config))
def get_estimator(bert_config_file, init_checkpoint, max_seq_len, select_layers, batch_size=32, graph_file='../bert/tmp/graph', model_dir='../bert/tmp'): #from tensorflow.python.estimator.estimator import Estimator #from tensorflow.python.estimator.run_config import RunConfig #from tensorflow.python.estimator.model_fn import EstimatorSpec if os.path.exists(graph_file): graph_path = graph_file else: graph_path = create_graph(graph_file, bert_config_file, init_checkpoint, max_seq_len, select_layers) def model_fn(features, labels, mode, params): with tf.gfile.GFile(graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] encoder_layer = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) predictions = { # 'client_id': client_id, 'encodes': encoder_layer[0] } return EstimatorSpec(mode=mode, predictions=predictions) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction #config.log_device_placement = False #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 return Estimator(model_fn=model_fn, config=RunConfig(session_config=config), params={'batch_size': batch_size}, model_dir=model_dir)
def get_estimator(self): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig bert_config = modeling.BertConfig.from_json_file(args.config_name) init_checkpoint = args.ckpt_name model_fn = self.model_fn_builder( bert_config=bert_config, init_checkpoint=init_checkpoint, use_one_hot_embeddings=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False return Estimator(model_fn=model_fn, config=RunConfig(session_config=config),model_dir=args.model_dir, params={'batch_size': self.batch_size})
def get_estimator(self): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def classification_model_fn(features): """ 文本分类模型的model_fn :param features: :param labels: :param mode: :param params: :return: """ with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_ids = features["input_ids"] input_mask = features["input_mask"] input_map = {"input_ids": input_ids, "input_mask": input_mask} pred_probs = tf.import_graph_def(graph_def, name='', input_map=input_map, return_elements=['pred_prob:0']) return EstimatorSpec(mode=tf.estimator.ModeKeys.PREDICT, predictions={ 'encodes': tf.argmax(pred_probs[0], axis=-1), 'score': tf.reduce_max(pred_probs[0], axis=-1) }) # 0 表示只使用CPU 1 表示使用GPU config = tf.ConfigProto(device_count={'GPU': 1}) config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False # session-wise XLA doesn't seem to work on tf 1.10 # if args.xla: # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 return Estimator(model_fn=classification_model_fn, config=RunConfig(session_config=config))
def get_estimator(args, tf, graph_path): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def model_fn(features, labels, mode, params): with tf.gfile.GFile(graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] output = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) if args.fp16: return EstimatorSpec(mode=mode, predictions={ 'unique_ids ': features['unique_ids'], 'encodes': tf.cast(output[0], tf.float32) }) else: return EstimatorSpec(mode=mode, predictions={ 'unique_ids ': features['unique_ids'], 'encodes': output[0] }) config = tf.ConfigProto(device_count={'GPU': 0}, intra_op_parallelism_threads=16, inter_op_parallelism_threads=1) config.log_device_placement = False config.intra_op_parallelism_threads = 16 config.inter_op_parallelism_threads = 1 # session-wise XLA doesn't seem to work on tf 1.10 # if args.xla: # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 return Estimator(model_fn=model_fn, config=RunConfig(model_dir=args.checkpoint_dir, session_config=config))
def get_estimator(self, tf): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def model_fn(features, labels, mode, params): with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] output = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) return EstimatorSpec( mode=mode, predictions={ 'client_id': features['client_id'], 'input_ids': features['input_ids'], # [mnb] debug 'input_mask': features['input_mask'], # [mnb] debug 'input_type_ids': features['input_type_ids'], # [mnb] debug 'encodes': output[0] }) config = tf.ConfigProto( device_count={'GPU': 0 if self.device_id < 0 else 1}) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False # session-wise XLA doesn't seem to work on tf 1.10 # if args.xla: # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 return Estimator(model_fn=model_fn, config=RunConfig(session_config=config))
def get_estimator(self): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig bert_config = modeling.BertConfig.from_json_file(args.config_name) label_list = self.processor.get_labels() train_examples = self.processor.get_train_examples(args.data_dir) num_train_steps = int( len(train_examples) / self.batch_size * args.num_train_epochs) num_warmup_steps = int(num_train_steps * 0.1) if self.mode == tf.estimator.ModeKeys.TRAIN: init_checkpoint = args.ckpt_name else: init_checkpoint = args.output_dir model_fn = self.model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=init_checkpoint, learning_rate=args.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_one_hot_embeddings=False) # config = tf.ConfigProto(allow_soft_placement=True) # with tf.Session(config=config) as sess: # print(model_fn) # sess.run(tf.Print(model_fn, [model_fn])) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) config = tf.ConfigProto(gpu_options=gpu_options) # config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction config.log_device_placement = False return Estimator(model_fn=model_fn, config=RunConfig(session_config=config), model_dir=args.output_dir, params={'batch_size': self.batch_size})
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Get corpus info FLAGS.n_token = data_utils.VOCAB_SIZE tf.logging.info('n_token {}'.format(FLAGS.n_token)) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) bsz_per_core = FLAGS.train_batch_size train_input_fn, train_record_info_dict = get_input_fn( 'train', bsz_per_core) tf.logging.info('num of batches {}'.format( train_record_info_dict['num_batch'])) train_cache_fn = get_cache_fn(FLAGS.mem_len, bsz_per_core) tf.logging.info(train_cache_fn) log_every_n_steps = 10 run_config = RunConfig( log_step_count_steps=log_every_n_steps, model_dir=FLAGS.model_dir, save_checkpoints_steps=FLAGS.save_steps, save_summary_steps=None, ) model_fn = get_model_fn() tf.logging.info('Use normal Estimator') estimator = Estimator( model_fn=model_fn, params={ 'batch_size': bsz_per_core, 'cache': None }, config=run_config, ) tf.logging.info('***** Running evaluation *****') tf.logging.info(' Batch size = %d', FLAGS.train_batch_size) estimator.evaluate(input_fn=train_input_fn, steps=100)
def get_estimator(self, tf): """Get tf estimator """ def model_fn(features, labels, mode, params): with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] output = tf.import_graph_def(graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) return EstimatorSpec(mode=mode, predictions={ 'encodes': output[0] }) config = tf.ConfigProto(device_count={'GPU': 0 if self.device_id < 0 else 1}) config.gpu_options.allow_growth = True config.log_device_placement = False return Estimator(model_fn=model_fn, config=RunConfig(session_config=config))
def get_estimator(self, tf,device_id=0): # 加载图模型 from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def ner_model_fn(features, labels, mode, params): """ 命名实体识别模型的model_fn :param features: :param labels: :param mode: :param params: :return: """ with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_ids = features["input_ids"] input_mask = features["input_mask"] input_map = {"input_ids": input_ids, "input_mask": input_mask} pred_ids = tf.import_graph_def(graph_def, name='', input_map=input_map, return_elements=['pred_ids:0']) return EstimatorSpec(mode=mode, predictions={ 'encodes': pred_ids[0] }) # 0 表示只使用CPU 1 表示使用GPU config = tf.ConfigProto(device_count={'GPU': 0 if device_id < 0 else 1}) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False # session-wise XLA doesn't seem to work on tf 1.10 # if args.xla: # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 if self.mode == 'NER': return Estimator(model_fn=ner_model_fn, config=RunConfig(session_config=config))
def construct_estimator(output_dir, save_checkpoint_steps, model_config, init_checkpoint, learning_rate, max_seq_length, use_mask, label2idx_map, num_output, train_batch_size, eval_batch_size, lr_decay): mask_crf = MaskedCRF(use_mask=use_mask, label2idx_map=label2idx_map, num_output=num_output) run_config = RunConfig(model_dir=output_dir, save_checkpoints_steps=save_checkpoint_steps) model_fn = model_fn_builder(model_config=model_config, init_checkpoint=init_checkpoint, learning_rate=learning_rate, lr_decay=lr_decay, max_seq_length=max_seq_length, num_output=num_output, mask_crf=mask_crf) estimator = Estimator(model_fn=model_fn, params={ "train_batch_size": train_batch_size, "eval_batch_size": eval_batch_size }, config=run_config) return estimator
def get_estimator(self): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig if self.mode == tf.estimator.ModeKeys.TRAIN: init_checkpoint = args.ckpt_name train_examples = self.processor.get_train_examples(args.data_dir) num_train_steps = int( len(train_examples) / self.batch_size * args.num_train_epochs) else: init_checkpoint = args.output_dir num_train_steps = int(30522) bert_config = modeling.BertConfig.from_json_file(args.config_name) label_list = self.processor.get_labels() num_warmup_steps = int(num_train_steps * 0.1) model_fn = self.model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=init_checkpoint, learning_rate=args.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_one_hot_embeddings=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction config.log_device_placement = False return Estimator(model_fn=model_fn, config=RunConfig(session_config=config, save_summary_steps=100, save_checkpoints_steps=100, keep_checkpoint_max=1), model_dir=args.output_dir, params={'batch_size': self.batch_size})
def main(_): if not FLAGS.do_predict_one: tf.logging.set_verbosity(tf.logging.INFO) log_writer() processors = {"senti": SentimentProcessor} tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = False run_config = RunConfig(session_config=config, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) train_examples = None num_train_steps = None num_warmup_steps = None model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_one_hot_embeddings=False) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config, params={"batch_size": FLAGS.train_batch_size}) print(FLAGS.do_train) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) # if FLAGS.use_tpu: # # TPU requires a fixed batch size for all batches, therefore the number # # of examples must be a multiple of the batch size, or else examples # # will get dropped. So we pad with fake examples which are ignored # # later on. These do NOT count towards the metric (all tf.metrics # # support a per-instance weight, and these get a weight of 0.0). # while len(eval_examples) % FLAGS.eval_batch_size != 0: # eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. # if FLAGS.use_tpu: # assert len(eval_examples) % FLAGS.eval_batch_size == 0 # eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) # if FLAGS.use_tpu: # # TPU requires a fixed batch size for all batches, therefore the number # # of examples must be a multiple of the batch size, or else examples # # will get dropped. So we pad with fake examples which are ignored # # later on. # while len(predict_examples) % FLAGS.predict_batch_size != 0: # predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) output_line = "\t".join([ str(predict_examples[i].label), output_line, predict_examples[i].text_a ]) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples if FLAGS.do_predict_one: import time while True: start = time.clock() predict_examples = processor.get_input_example() num_actual_predict_examples = len(predict_examples) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) predict_drop_remainder = False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) print("time:", time.clock() - start) start = time.clock() result = estimator.predict(input_fn=predict_input_fn) for r in result: print(r) print("time1111111111111111:", time.clock() - start)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( 'At least one of `do_train` or `do_eval` must be True.') bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) tf.gfile.MakeDirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(','): input_files.extend(tf.gfile.Glob(input_pattern)) tf.logging.info('*** Input Files ***') for input_file in input_files: tf.logging.info(' %s' % input_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2: tf.logging.info('Use normal RunConfig') tf.logging.info(FLAGS.num_gpu_cores) dist_strategy = tf.contrib.distribute.MirroredStrategy( num_gpus=FLAGS.num_gpu_cores, auto_shard_dataset=True, cross_device_ops=AllReduceCrossDeviceOps( 'nccl', num_packs=FLAGS.num_gpu_cores), # cross_device_ops=AllReduceCrossDeviceOps('hierarchical_copy'), ) log_every_n_steps = 10 run_config = RunConfig( train_distribute=dist_strategy, eval_distribute=dist_strategy, log_step_count_steps=log_every_n_steps, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, ) else: run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host, ), ) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, ) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2: tf.logging.info('Use normal Estimator') estimator = Estimator(model_fn=model_fn, params={}, config=run_config) else: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, ) if FLAGS.do_train: tf.logging.info('***** Running training *****') tf.logging.info(' Batch size = %d', FLAGS.train_batch_size) if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2: train_input_fn = input_fn_builder_gpu( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True, batch_size=per_device_batch_size(FLAGS.train_batch_size, FLAGS.num_gpu_cores), ) else: train_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True, ) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf.logging.info('***** Running evaluation *****') tf.logging.info(' Batch size = %d', FLAGS.eval_batch_size) if FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2: train_input_fn = input_fn_builder_gpu( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False, batch_size=FLAGS.eval_batch_size, ) else: eval_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False, ) result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, 'eval_results.txt') with tf.gfile.GFile(output_eval_file, 'w') as writer: tf.logging.info('***** Eval results *****') for key in sorted(result.keys()): tf.logging.info(' %s = %s', key, str(result[key])) writer.write('%s = %s\n' % (key, str(result[key])))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) tf.gfile.MakeDirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(","): input_files.extend(tf.gfile.Glob(input_pattern)) tf.logging.info("*** Input Files ***") for input_file in input_files: tf.logging.info(" %s" % input_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 dist_strategy = tf.contrib.distribute.MirroredStrategy( num_gpus=FLAGS.n_gpus, cross_device_ops=AllReduceCrossDeviceOps('nccl', num_packs=FLAGS.n_gpus), ) ''' IF ERROR COULD TRY dist_strategy = tf.contrib.distribute.MirroredStrategy( devices=["device:GPU:%d" % i for i in range(FLAGS.n_gpus)], cross_tower_ops=tf.distribute.HierarchicalCopyAllReduce()) ''' log_every_n_steps = 8 run_config = RunConfig(train_distribute=dist_strategy, eval_distribute=dist_strategy, log_step_count_steps=log_every_n_steps, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = Estimator(model_fn=model_fn, params={}, config=run_config) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) train_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False) result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))
checkpoint_fp = os.path.join(model_dir, 'bert_model.ckpt') vocab_fp = os.path.join(model_dir, 'vocab.txt') tokenizer = tokenization.FullTokenizer(vocab_file=vocab_fp) max_seq_len = 10 worker_id = id daemon = True model_fn = model_fn_builder( bert_config=modeling.BertConfig.from_json_file(config_fp), init_checkpoint=checkpoint_fp, pooling_strategy=PoolingStrategy.NONE, pooling_layer=[-2]) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.3 estimator = Estimator(model_fn, config=RunConfig(session_config=config), model_dir=None) def input_fn_builder(msg): def gen(): for i in range(1): tmp_f = list(convert_lst_to_features(msg, max_seq_len, tokenizer)) yield { 'input_ids': [f.input_ids for f in tmp_f], 'input_mask': [f.input_mask for f in tmp_f], 'input_type_ids': [f.input_type_ids for f in tmp_f] } def input_fn(): for i in gen():
def run_training( train_fn, model_fn, model_dir: str, num_gpus: int = 1, gpu_mem_fraction: float = 0.95, log_step: int = 100, summary_step: int = 100, save_checkpoint_step: int = 1000, max_steps: int = 10000, eval_step: int = 10, eval_throttle: int = 120, use_tpu: bool = False, tpu_name: str = None, tpu_zone: str = None, gcp_project: str = None, iterations_per_loop: int = 100, num_tpu_cores: int = 8, train_batch_size: int = 128, train_hooks=None, eval_fn=None, ): tf.logging.set_verbosity(tf.logging.INFO) if num_gpus > 1 and not use_tpu: dist_strategy = tf.contrib.distribute.MirroredStrategy( num_gpus=num_gpus, auto_shard_dataset=True, cross_device_ops=AllReduceCrossDeviceOps('nccl', num_packs=num_gpus), ) else: dist_strategy = None if use_tpu: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_name, zone=tpu_zone, project=gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=None, model_dir=model_dir, save_checkpoints_steps=save_checkpoint_step, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=num_tpu_cores, per_host_input_for_training=is_per_host, ), ) else: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) run_config = RunConfig( train_distribute=dist_strategy, eval_distribute=dist_strategy, log_step_count_steps=log_step, model_dir=model_dir, save_checkpoints_steps=save_checkpoint_step, save_summary_steps=summary_step, session_config=config, ) if use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=use_tpu, model_fn=model_fn, config=run_config, train_batch_size=train_batch_size, eval_batch_size=None, ) eval_fn = None else: estimator = tf.estimator.Estimator(model_fn=model_fn, params={}, config=run_config) if eval_fn: train_spec = tf.estimator.TrainSpec(input_fn=train_fn, max_steps=max_steps, hooks=train_hooks) eval_spec = tf.estimator.EvalSpec(input_fn=eval_fn, steps=eval_step, throttle_secs=eval_throttle) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) else: estimator.train(input_fn=train_fn, max_steps=max_steps, hooks=train_hooks)
def get_estimator(self, tf): from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.model_fn import EstimatorSpec def model_fn(features, labels, mode, params): with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_names = ['input_ids', 'input_mask', 'input_type_ids'] output = tf.import_graph_def( graph_def, input_map={k + ':0': features[k] for k in input_names}, return_elements=['final_encodes:0']) return EstimatorSpec(mode=mode, predictions={ 'client_id': features['client_id'], 'encodes': output[0] }) def ner_model_fn(features, labels, mode, params): """ 命名实体识别模型的model_fn :param features: :param labels: :param mode: :param params: :return: """ with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_ids = features["input_ids"] input_mask = features["input_mask"] input_map = {"input_ids": input_ids, "input_mask": input_mask} pred_ids = tf.import_graph_def(graph_def, name='', input_map=input_map, return_elements=['pred_ids:0']) return EstimatorSpec(mode=mode, predictions={ 'client_id': features['client_id'], 'encodes': pred_ids[0] }) def classification_model_fn(features, labels, mode, params): """ 文本分类模型的model_fn :param features: :param labels: :param mode: :param params: :return: """ with tf.gfile.GFile(self.graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_ids = features["input_ids"] input_mask = features["input_mask"] input_map = {"input_ids": input_ids, "input_mask": input_mask} pred_probs = tf.import_graph_def(graph_def, name='', input_map=input_map, return_elements=['pred_prob:0']) return EstimatorSpec( mode=mode, predictions={ 'client_id': features['client_id'], #'encodes': tf.argmax(pred_probs[0], axis=-1), #'score': tf.reduce_max(pred_probs[0], axis=-1) 'encodes': pred_probs[0], 'score': pred_probs[0] }) # 0 表示只使用CPU 1 表示使用GPU config = tf.ConfigProto( device_count={'GPU': 0 if self.device_id < 0 else 1}) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = self.gpu_memory_fraction config.log_device_placement = False # session-wise XLA doesn't seem to work on tf 1.10 # if args.xla: # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 if self.mode == 'NER': return Estimator(model_fn=ner_model_fn, config=RunConfig(session_config=config)) elif self.mode == 'BERT': return Estimator(model_fn=model_fn, config=RunConfig(session_config=config)) elif self.mode == 'CLASS': return Estimator(model_fn=classification_model_fn, config=RunConfig(session_config=config))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, "qqp": QqpProcessor, 'chnsenticorp': ChnsenticorpProcessor, 'gt': GTProcessor, 'tcl': TCLProcessor } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 # https://github.com/tensorflow/tensorflow/issues/21470#issuecomment-422506263 dist_strategy = tf.contrib.distribute.MirroredStrategy( num_gpus=FLAGS.num_gpu_cores, cross_device_ops=AllReduceCrossDeviceOps('nccl', num_packs=FLAGS.num_gpu_cores), # cross_device_ops=AllReduceCrossDeviceOps('hierarchical_copy'), ) log_every_n_steps = 8 dist_run_config = RunConfig( train_distribute=dist_strategy, eval_distribute=dist_strategy, log_step_count_steps=log_every_n_steps, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) tpu_run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) init_checkpoint = FLAGS.init_checkpoint is_multi_gpu = FLAGS.use_gpu and int(FLAGS.num_gpu_cores) >= 2 model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, use_gpu=FLAGS.use_gpu, num_gpu_cores=FLAGS.num_gpu_cores, fp16=FLAGS.use_fp16, weight_list = FLAGS.weight_list) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if is_multi_gpu: estimator = Estimator( model_fn=model_fn, params={}, config=dist_run_config) else: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, batch_size=FLAGS.train_batch_size) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) # TF Serving if FLAGS.save_for_serving: serving_dir = os.path.join(FLAGS.output_dir, 'serving') save_for_serving(estimator, serving_dir, FLAGS.max_seq_length, not is_multi_gpu) # Find the latest checkpoint max_idx = 0 for filename in os.listdir(FLAGS.output_dir): if filename.startswith('model.ckpt-'): max_idx = max(int(filename.split('.')[1].split('-')[1]), max_idx) init_checkpoint = os.path.join(FLAGS.output_dir, f'model.ckpt-{max_idx}') tf.logging.info(f'Current checkpoint: {init_checkpoint}') if FLAGS.do_eval: model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, use_gpu=FLAGS.use_gpu, num_gpu_cores=FLAGS.num_gpu_cores, fp16=FLAGS.use_fp16, weight_list = FLAGS.weight_list) eval_estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, batch_size=FLAGS.eval_batch_size) result = eval_estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) # dump result as json file (easy parsing for other tasks) class ExtEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) if isinstance(obj, np.floating): return float(obj) if isinstance(obj, np.ndarray): return obj.tolist() else: return super(ExtEncoder, self).default(obj) output_eval_file2 = os.path.join(FLAGS.output_dir, "eval_results.json") with tf.gfile.GFile(output_eval_file2, "w") as writer: json.dump(result, writer, indent=4, cls=ExtEncoder) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder, batch_size=FLAGS.predict_batch_size) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def get_encodes(x): # x is `batch_size` of lines, each of which is a json object samples = [json.loads(l) for l in x] text = [s['fact'][:50] + s['fact'][-50:] for s in samples] features = bc_client.encode(text) labels = [[str(random.choice(s['meta']['relevant_articles']))] for s in samples] return features, labels config = tf.ConfigProto() config.gpu_options.allow_growth = True run_config = RunConfig(model_dir='/data/cips/save/%s' % MODEL_ID, session_config=config, save_checkpoints_steps=2000) estimator = DNNClassifier(hidden_units=[512], feature_columns=[ tf.feature_column.numeric_column('feature', shape=(768, )) ], n_classes=len(laws), config=run_config, label_vocabulary=laws_str, dropout=0.1) input_fn = lambda fp: (tf.data.TextLineDataset(fp).apply( tf.contrib.data.shuffle_and_repeat(buffer_size=10000)).batch(batch_size). map(lambda x: tf.py_func(
def main(_): if not FLAGS.do_predict_one: tf.logging.set_verbosity(tf.logging.INFO) log_writer() processors = {"senti": SentimentProcessor} tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) # if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict and not FLAGS.do_predict_one: # raise ValueError( # "At least one of `do_train`, `do_eval` or `do_predict' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None # if FLAGS.use_tpu and FLAGS.tpu_name: # tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( # FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.log_device_placement = False run_config = RunConfig(session_config=config, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) input_ids, input_mask, segment_ids, label_ids, total_loss, per_example_loss, logits, probabilities = convert_ckpt_to_saved_model( FLAGS.bert_config_file, FLAGS.init_checkpoint) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) ckpt = tf.train.latest_checkpoint(FLAGS.output_dir) # 找到存储变量值的位置 saver.restore(sess, ckpt) # 加载到当前环境中 print('finish loading model!') while True: string = input("Please input:") example = InputExample(guid="0", text_a=string, text_b="", label="1") feature = convert_examples_to_features([example], label_list, FLAGS.max_seq_length, tokenizer)[0] result = sess.run( [probabilities], feed_dict={ input_ids: [feature.input_ids], input_mask: [feature.input_mask], segment_ids: [feature.segment_ids] }) print('probabilities:' + str(result))
def train(params: user_params, input_): # estimator运行环境配置 session_config = tf.ConfigProto() session_config.allow_soft_placement = True session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.allow_growth = True if FLAGS.gpu_cores: gpu_cors = tuple(FLAGS.gpu_cores) devices = ["/device:GPU:%d" % int(d) for d in gpu_cors] tf.logging.warn("using device: " + " ".join(devices)) distribution = tf.contrib.distribute.MirroredStrategy(devices=devices) tf.logging.warn("in train.py, distribution") tf.logging.warn(distribution._devices) config = RunConfig(save_checkpoints_steps=FLAGS.check_steps, train_distribute=distribution, keep_checkpoint_max=2, session_config=session_config) else: config = RunConfig(save_checkpoints_steps=FLAGS.check_steps, keep_checkpoint_max=2, session_config=session_config) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, config=config, params=params) train_data_dir = input_.get_data_dir(tf.estimator.ModeKeys.TRAIN, params) eval_data_dir = input_.get_data_dir(tf.estimator.ModeKeys.EVAL, params) hook = [] if not params.enable_ema else [ LoadEMAHook(params.model_dir, 0.99) ] listeners = [ EvalListener(estimator, lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL, params=params, data_dir=train_data_dir), name="train_data", hook=hook), EvalListener(estimator, lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL, params=params, data_dir=eval_data_dir), hook=hook), # VariableListener() ] def train_input_fn(): return input_.input_fn(mode=tf.estimator.ModeKeys.TRAIN, params=params, data_dir=train_data_dir) # gpu cluster if config.cluster_spec: train_spec = MyTraining.TrainSpec(train_input_fn, FLAGS.max_steps) eval_spec = MyTraining.EvalSpec( lambda: input_.input_fn(mode=tf.estimator.ModeKeys.EVAL, params=params, data_dir=train_data_dir), steps=FLAGS.check_steps) MyTraining.train_and_evaluate(estimator, train_spec, eval_spec, listeners) if config.task_type == TaskType.CHIEF: model_dir = estimator.export_savedmodel( FLAGS.model_dir, input_.get_input_reciever_fn()) tf.logging.warn("save model to %s" % model_dir) # cpu solo else: # from tensorflow.python import debug as tf_debug # debug_hook = [tf_debug.LocalCLIDebugHook(ui_type="readline")] # estimator.train(train_input_fn, max_steps=FLAGS.max_steps, saving_listeners=listeners, hooks=debug_hook) estimator.train(train_input_fn, max_steps=FLAGS.max_steps, saving_listeners=listeners) dir = estimator.export_savedmodel(tf.flags.FLAGS.model_dir, input_.get_input_reciever_fn()) tf.logging.warn("save model to %s" % dir) for listener in listeners: print(listener.name) print(listener.history)
def get_encodes(x): # x is `batch_size` of lines, each of which is a json object samples = [json.loads(l) for l in x] text = [s['fact'][:50] + s['fact'][-50:] for s in samples] features = bc.encode(text) # randomly choose a label labels = [[str(random.choice(s['meta']['relevant_articles']))] for s in samples] return features, labels config = tf.ConfigProto() config.gpu_options.allow_growth = True run_config = RunConfig(model_dir='/data/cips/save/law-model', session_config=config, save_checkpoints_steps=1000) estimator = DNNClassifier( hidden_units=[512], feature_columns=[tf.feature_column.numeric_column('feature', shape=(768,))], n_classes=len(laws), config=run_config, label_vocabulary=laws_str, dropout=0.1) input_fn = lambda fp: (tf.data.TextLineDataset(fp) .apply(tf.contrib.data.shuffle_and_repeat(buffer_size=10000)) .batch(batch_size) .map(lambda x: tf.py_func(get_encodes, [x], [tf.float32, tf.string], name='bert_client'), num_parallel_calls=num_parallel_calls)
def _Run(self, is_training, use_trt, batch_size, num_epochs, model_dir): """Train or evaluate the model. Args: is_training: whether to train or evaluate the model. In training mode, quantization will be simulated where the quantize_and_dequantize_v2 are placed. use_trt: if true, use TRT INT8 mode for evaluation, which will perform real quantization. Otherwise use native TensorFlow which will perform simulated quantization. Ignored if is_training is True. batch_size: batch size. num_epochs: how many epochs to train. Ignored if is_training is False. model_dir: where to save or load checkpoint. Returns: The Estimator evaluation result. """ # Get dataset train_data, test_data = mnist.load_data() def _PreprocessFn(x, y): x = math_ops.cast(x, dtypes.float32) x = array_ops.expand_dims(x, axis=2) x = 2.0 * (x / 255.0) - 1.0 y = math_ops.cast(y, dtypes.int32) return x, y def _EvalInputFn(): mnist_x, mnist_y = test_data dataset = data.Dataset.from_tensor_slices((mnist_x, mnist_y)) dataset = dataset.apply( data.experimental.map_and_batch(map_func=_PreprocessFn, batch_size=batch_size, num_parallel_calls=8)) dataset = dataset.repeat(count=1) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() return features, labels def _TrainInputFn(): mnist_x, mnist_y = train_data dataset = data.Dataset.from_tensor_slices((mnist_x, mnist_y)) dataset = dataset.shuffle(2 * len(mnist_x)) dataset = dataset.apply( data.experimental.map_and_batch(map_func=_PreprocessFn, batch_size=batch_size, num_parallel_calls=8)) dataset = dataset.repeat(count=num_epochs) iterator = dataset.make_one_shot_iterator() features, labels = iterator.get_next() return features, labels def _ModelFn(features, labels, mode): if is_training: logits_out = self._BuildGraph(features) else: graph_def = self._GetGraphDef(use_trt, batch_size, model_dir) logits_out = importer.import_graph_def( graph_def, input_map={INPUT_NODE_NAME: features}, return_elements=[OUTPUT_NODE_NAME + ':0'], name='')[0] loss = losses.sparse_softmax_cross_entropy(labels=labels, logits=logits_out) summary.scalar('loss', loss) classes_out = math_ops.argmax(logits_out, axis=1, name='classes_out') accuracy = metrics.accuracy(labels=labels, predictions=classes_out, name='acc_op') summary.scalar('accuracy', accuracy[1]) if mode == ModeKeys.EVAL: return EstimatorSpec(mode, loss=loss, eval_metric_ops={'accuracy': accuracy}) elif mode == ModeKeys.TRAIN: optimizer = AdamOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op) config_proto = config_pb2.ConfigProto() config_proto.gpu_options.allow_growth = True estimator = Estimator(model_fn=_ModelFn, model_dir=model_dir if is_training else None, config=RunConfig(session_config=config_proto)) if is_training: estimator.train(_TrainInputFn) results = estimator.evaluate(_EvalInputFn) logging.info('accuracy: %s', str(results['accuracy'])) return results
def bidaf_train(_): params = user_params( procedure=tuple(FLAGS.procedure), label_name=FLAGS.label_name, learning_rate=FLAGS.learning_rate, embed_size=FLAGS.embed_size, embedding_file_path=FLAGS.embedding_file_path, context_name=FLAGS.context_name, question_name=FLAGS.question_name, rnn_hidden_size=FLAGS.rnn_hidden_size, data_dir=FLAGS.data_dir, model_dir=FLAGS.model_dir, batch_size=FLAGS.batch_size, drop_out_rate=FLAGS.drop_out_rate, p1=FLAGS.p1, p2=FLAGS.p2, feature_voc_file_path=FLAGS.feature_voc_file_path, gpu_cores_list=FLAGS.gpu_cores, transfromer_conv_layers=FLAGS.transfromer_conv_layers, transfromer_conv_kernel_size=FLAGS.transfromer_conv_kernel_size, transfromer_head_number=FLAGS.transfromer_head_number, tansformer_d_model=FLAGS.tansformer_d_model, clip_norm=FLAGS.clip_norm, use_char_embedding=FLAGS.use_char_embedding, char_embedding_size=FLAGS.char_embedding_size, char_feature_name=FLAGS.char_feature_name, char_question_name=FLAGS.char_question_name, example_max_length=FLAGS.example_max_length, enable_ema=FLAGS.enable_ema, ema_decay=FLAGS.ema_decay, char_filters=FLAGS.char_filters, ans_limit=FLAGS.ans_limit) #词向量文件的加载 enrich_hyper_parameters(params) # 配置日志等级 level_str = 'tf.logging.{}'.format(str(tf.flags.FLAGS.log_level).upper()) tf.logging.set_verbosity(eval(level_str)) #加载数据,创建一个SparkInput类对象 input = SparkInput(params) sess_config = tf.ConfigProto() sess_config.allow_soft_placement = True sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9 sess_config.gpu_options.allow_growth = True #sess_config.report_tensor_allocations_upon_oom = True #sess_config.log_device_placement = True # estimator运行环境配置 if FLAGS.gpu_cores: gpu_cors = tuple(eval(FLAGS.gpu_cores)) #FLAGS.gpu_cores devices = ["/device:GPU:%d" % d for d in gpu_cors] #"/device:GPU:%d" % d作为元组中的一个元素整体 distribution = tf.contrib.distribute.MirroredStrategy( devices=devices) #distribution是一个MirroredStrategy类 config = RunConfig(save_checkpoints_steps=FLAGS.check_steps, train_distribute=distribution) else: config = RunConfig(save_checkpoints_steps=FLAGS.check_steps, session_config=sess_config) #config是一个RunConfig类对象 #estimator创建 estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.model_dir, config=config, params=params) #得到训练和测试数据的文件路径 train_data_dir = input.get_data_dir(tf.estimator.ModeKeys.TRAIN, params.data_dir) eval_data_dir = input.get_data_dir(tf.estimator.ModeKeys.EVAL, params.data_dir) #创建EvalListener进行训练和预测的评估 hook = [] if not params.enable_ema else [ LoadEMAHook(params.model_dir, FLAGS.ema_decay) ] listeners = [ EvalListener(estimator, lambda: input.input_fn(mode=tf.estimator.ModeKeys.EVAL, params=params, data_dir=train_data_dir), name="train_data", hook=hook), EvalListener(estimator, lambda: input.input_fn(mode=tf.estimator.ModeKeys.EVAL, params=params, data_dir=eval_data_dir), hook=hook) ] #由训练数据的文件路径获取训练数据 def train_input_fn(): return input.input_fn(mode=tf.estimator.ModeKeys.TRAIN, params=params, data_dir=train_data_dir) #gpu cluster if config.cluster_spec: train_spec = MyTraining.TrainSpec(train_input_fn, FLAGS.max_steps) eval_spec = MyTraining.EvalSpec(lambda: input.input_fn( mode=tf.estimator.ModeKeys.EVAL, params=params), steps=FLAGS.check_steps) MyTraining.train_and_evaluate(estimator, train_spec, eval_spec, listeners) if config.task_type == TaskType.CHIEF: model_dir = estimator.export_savedmodel( FLAGS.model_dir, input.get_input_reciever_fn()) tf.logging.warn("save model to %s" % model_dir) #cpu solo else: print("执行*************************") estimator.train(train_input_fn, max_steps=FLAGS.max_steps, saving_listeners=listeners) dir = estimator.export_savedmodel(tf.flags.FLAGS.model_dir, input.get_input_reciever_fn()) tf.logging.warn("save model to %s" % dir) for listener in listeners: print(listener.name) print(listener.history)