def main(args): config = get_config_from_args(args, mode='dev') config.batch_size = args.batch_size logger.info('config: \n{}'.format('\n'.join( ['{}: {}'.format(i[0], i[1]) for i in sorted(config.items())]))) record_parser = get_record_parser(config.model, config.task) predict_input_fn = input_fn_builder(record_parser, config)(config) iterator = predict_input_fn.make_initializable_iterator() logger.info("running in batch mode...") checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) out_dir = args.out_dir os.makedirs(out_dir, exist_ok=True) with tf.Session() as sess: features, labels = iterator.get_next() model = get_task_model_class(config.model, config.task)(config) feed_fn, output_tensors = model.infer_graph(config) saver = tf.train.Saver(var_list=tf.global_variables()) logger.info( "restoring model weights from: {}...".format(checkpoint_path)) saver.restore(sess, checkpoint_path) batches = 1 sess.run(iterator.initializer) while True: try: feature_values = sess.run(features) # logger.info('feature_values={}...'.format(feature_values)) feed = feed_fn(feature_values) attentions, encoded_output = sess.run( [model.attentions, model.encoded_output], feed_dict=feed) layers = len(encoded_output) for layer in range(layers): feature_values['layer_{}'.format( layer)] = encoded_output[layer] feature_values['attn_{}'.format(layer)] = attentions[layer] if batches < 3: logger.info('num layers={}'.format(layers)) logger.info('\n'.join([ '{}={}'.format(k, v.shape) for k, v in feature_values.items() ])) output_path = os.path.join( out_dir, '{}_b{}'.format(args.model, batches)) logger.info('saving outputs for b={}...'.format(batches)) np.savez_compressed(output_path, **feature_values) logger.info('outputs saved to: {}'.format(output_path)) batches += 1 except tf.errors.OutOfRangeError: logger.info('all done') break
def main(args): config = get_config_from_args(args, mode='infer') max_seq_length = args.max_seq_length or config.max_seq_length config.max_seq_length = max_seq_length logger.info("exporting {} model...".format(config.model)) checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) with tf.Session() as sess: model = get_task_model_class(config.model, config.task)(config) input_nodes, logits_ph = model.export_graph(config, training=False, logits=True) saver = tf.train.Saver(var_list=tf.global_variables()) logger.info('begin restoring model from checkpoints...') saver.restore(sess, checkpoint_path) inference_graph_file = config.inference_graph saved_model_path = os.path.join(os.path.dirname(inference_graph_file), 'saved_model') if not os.path.exists(saved_model_path): logger.info("exporting saved_model...") tf.saved_model.simple_save(sess, saved_model_path, inputs=input_nodes, outputs={'logits': logits_ph}) if args.quantize: save_name = "{}.quant.tflite".format(model.name) else: save_name = "{}.tflite".format(model.name) tflite_file = os.path.join(os.path.dirname(inference_graph_file), save_name) if not os.path.exists(tflite_file): logger.info("exporting tflite model...") converter = tf.lite.TFLiteConverter.from_session( sess, list(input_nodes.values()), [logits_ph]) if args.quantize: converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE] converter.target_ops = [ tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] tflite_model = converter.convert() with open(tflite_file, "wb") as f: f.write(tflite_model) """freeze_graph --input_saved_model_dir=data/ckpt/bert/saved_model \
def serve(args): config = get_config_from_args(args, mode='infer') # tf.enable_eager_execution() # tf.set_random_seed(config.random_seed) checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) # initialize model sess = tf.Session() model = get_task_model_class(config.model, config.task)(config) feed_fn, output_tensors = model.infer_graph(config) saver = tf.train.Saver(var_list=tf.global_variables()) saver.restore(sess, checkpoint_path) logger.info("{} loaded, waiting for questions...".format(checkpoint_path)) while True: msg = request_queue.get() if msg is None: break # call model to do prediction (request_id, model_id, inputs) = msg logger.info("begin preprocessing on request={}".format(request_id)) outputs = [] input_features = model.text_to_feature(inputs, config) logger.info("begin predicting on request={}".format(request_id)) total_batches = len(input_features) // args.batch_size for batch_feature in tqdm(batch(input_features, args.batch_size), total=total_batches): feed = feed_fn(batch_feature) # logger.info("{}: batch {} started...".format(request_id, idx)) model_outputs = sess.run(output_tensors, feed) output = model.prepare_outputs(model_outputs, config, batch_feature) # logger.info("{}: batch {} done...".format(request_id, idx)) outputs.extend(output) # prediction_answers = decode_answer( # contexts, context_spans, start_predictions, end_predictions, # output_char_start) # all_answers.extend(prediction_answers) # all_probabilities.extend([round(float(s), 6) # for s in norm_scores]) logger.info("prediction for {} finished".format(request_id)) response_queue.put((request_id, model_id, outputs))
def main(args): config = get_config_from_args(args, mode='infer') max_seq_length = args.max_seq_length or config.max_seq_length config.max_seq_length = max_seq_length contexts = [ "The American Football Conference (AFC) champion Denver Broncos " "defeated the National Football Conference (NFC) champion Carolina " "Panthers 24–10 to earn their third Super Bowl title.", "The game was played on February 7, 2016, at Levi's Stadium in the " "San Francisco Bay Area at Santa Clara, California.", "College sports are also popular in southern California. " "The UCLA Bruins and the USC Trojans both field teams in NCAA Division" " I in the Pac-12 Conference, and there is a longtime " "rivalry between the schools.", ] questions = [ "What is the AFC short for?", "What day was the game played on?", "What other kind of sport is popular in southern California?", ] max_answer_span = args.max_answer_span or config.max_answer_span config.max_answer_span = max_answer_span text_inputs = [{ 'qid': qid, 'question': q, 'context': ctx } for qid, (q, ctx) in enumerate(zip(questions, contexts))] outputs = [] if args.eager: logger.info("running in eager mode...") tf.enable_eager_execution() checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) logger.info("restoring weights from: {}...".format(checkpoint_path)) # with tf.contrib.eager.restore_variables_on_create(None): with tf.contrib.eager.restore_variables_on_create(checkpoint_path): model = get_task_model_class(config.model, config.task)(config) logger.info("warming up model...") model.warm_up() # trainable_count = int(numpy.sum([tf.keras.backend.count_params(p) # for p in set(model.trainable_weights)])) # non_trainable_count = int(numpy.sum([tf.keras.backend.count_params(p) # for p in set(model.non_trainable_weights)])) # print('trainable_count', abbreviate(trainable_count)) # print('non_trainable_count', abbreviate(non_trainable_count)) # # #### testing TF 2.0 #### # logger.info("restoring model weights...") # model = get_model(config) # checkpoint = tf.train.Checkpoint(model=model) # checkpoint.restore(os.path.join(config.checkpoint_dir, 'ckpt-1')) # with tf.contrib.eager.restore_variables_on_create(checkpoint_path): # # model = get_model(config) # logger.info("warming up model...") # model.warm_up(config) # checkpoint = tf.train.Checkpoint(model=model) # manager = tf.train.CheckpointManager(checkpoint, # os.path.join(config.checkpoint_dir, 'keras1.14'), max_to_keep=1) # manager.save() text_features = model.text_to_feature(text_inputs, config) # inputs_tensor = [tf.convert_to_tensor(i) for i in inputs] logger.info("begin inferring...") start_time = time.time() model_outputs = model.infer(text_features) output = model.prepare_outputs(model_outputs, config, text_features) logger.info('output={}\n\n'.format(output)) outputs.extend(output) else: logger.info("running in graph mode...") checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) with tf.Session() as sess: model = get_task_model_class(config.model, config.task)(config) feed_fn, output_tensors = model.infer_graph(config) # inference_graph_file = config.inference_graph # if not os.path.exists(inference_graph_file): # logger.info("generating inference graph...") # graph_def = sess.graph_def # with tf.gfile.GFile(inference_graph_file, 'wb') as f: # f.write(graph_def.SerializeToString()) # with tf.io.gfile.GFile(inference_graph_file + '.txt', 'w') as f: # f.write(str(graph_def)) # logger.info("inference graph saved to: {}".format( # inference_graph_file)) saver = tf.train.Saver(var_list=tf.global_variables()) logger.info('begin restoring model from checkpoints...') saver.restore(sess, checkpoint_path) logger.info('begin predicting...') text_features = model.text_to_feature(text_inputs, config) start_time = time.time() for text_features in batch(text_features, args.batch_size): feed = feed_fn(text_features) model_outputs = sess.run(output_tensors, feed) output = model.prepare_outputs(model_outputs, config, text_features) logger.info('output={}\n\n'.format(output)) outputs.extend(output) end_time = time.time() logger.info('infer time: {:.4f} s'.format(end_time - start_time)) for q, c, a in zip(questions, contexts, outputs): logger.info('q={}\na={}\n\tcontext={}\n\n'.format(q, a, c))
def main(args): config = get_config_from_args(args, mode='infer') max_seq_length = args.max_seq_length or config.max_seq_length config.max_seq_length = max_seq_length max_answer_span = args.max_answer_span or config.max_answer_span config.max_answer_span = max_answer_span model_file = args.model_file questions = [ "What is the AFC short for?", # "What day was the game played on?", ] contexts = [ "The American Football Conference (AFC) champion Denver Broncos defeated the National " "Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title.", # "The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area " \ # "at Santa Clara, California.", ] logger.info("running in eager mode...") tf.enable_eager_execution() checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir) logger.info("restoring model weights...") with tf.contrib.eager.restore_variables_on_create(checkpoint_path): model = get_model(config) logger.info("warming up model...") model.warm_up(config) context_spans, inputs = model.get_inputs(questions, contexts, config) inputs_tensor = [ tf.convert_to_tensor(i, dtype=tf.int32) for i in inputs.values() ] logger.info("begin inferring...") start_predictions, end_predictions, norm_scores = model.infer( inputs_tensor, max_answer_span=config.max_answer_span, export=True) prediction_answers = decode_answer(contexts, context_spans, start_predictions, end_predictions) for q, c, a, ns in zip(questions, contexts, prediction_answers, norm_scores): logger.info('q={}\na={}\n\tcontext={}\n\n'.format( q, (a, round(float(ns), 4)), c)) print(model.embeddings.shape) print(model.logits.shape) input_ids = inputs_tensor[0] print(input_ids.shape) input_ids_file = os.path.join(os.path.dirname(model_file), 'input_ids') input_embeddings_file = os.path.join(os.path.dirname(model_file), 'input_embeddings') output_logits_file = os.path.join(os.path.dirname(model_file), 'output_logits') np.save(input_ids_file, input_ids) np.save(input_embeddings_file, model.embeddings) np.save(output_logits_file, model.logits) # Load TFLite model and allocate tensors. interpreter = tf.lite.Interpreter(model_path=model_file) interpreter.allocate_tensors() # Get input and output tensors. input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # Test model on random input data. print(input_details) print(output_details) print(model.logits) interpreter.set_tensor(input_details[0]['index'], input_ids) interpreter.set_tensor(input_details[1]['index'], model.embeddings) interpreter.invoke() # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. output_data = interpreter.get_tensor(output_details[0]['index']) print(output_data.shape) print(output_data) print(np.allclose(output_data, model.logits, rtol=1e-4))
def main(args): config = get_config_from_args(args, mode='dev') config.iterate_checkpoints = args.iterate_checkpoints config.checkpoint_path = args.checkpoint_path config.iterate_timeout = args.iterate_timeout evaluate(config)
def main(args): config = get_config_from_args(args, mode='infer') model_name = config.model kwargs = dict(training=False, logits=True) if model_name == 'ebert': kwargs['fake_cache_first'] = args.cache_segment == 1 kwargs['fake_cache_second'] = args.cache_segment == 2 config.batch_size = args.batch_size config.max_seq_length = args.max_seq_length or config.max_seq_length logger.info("running in graph mode...") run_metadata = tf.RunMetadata() with tf.Session() as sess: model = get_task_model_class(config.model, task=args.task)(config) inputs_dict, logits_ph = model.export_graph(config, **kwargs) sess.run(tf.global_variables_initializer()) # saver = tf.train.Saver() # saver.save(sess, 'data/sbert', write_meta_graph=False) opt_builder = tf.profiler.ProfileOptionBuilder if args.print_parameters: tf.profiler.profile( sess.graph, options=opt_builder.trainable_variables_parameter()) if not args.not_profile_flops: prof_options = opt_builder.float_operation() prof_options['hide_name_regexes'] = ['.*/Initializer/.*'] tfprof_node = tf.profiler.profile(sess.graph, options=prof_options) profile_metric(model_name, tfprof_node, metric='total_float_ops', metric_name='flops') if args.profile_memory: options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = run_metadata else: options = None run_metadata = None _ = sess.run([logits_ph], feed_dict=inputs_dict, options=options, run_metadata=run_metadata) if args.profile_memory: opts = tf.profiler.ProfileOptionBuilder( tf.profiler.ProfileOptionBuilder.time_and_memory()).build() tfprof_node = tf.profiler.profile(tf.get_default_graph(), run_meta=run_metadata, cmd='scope', options=opts) profile_metric(model_name, tfprof_node, metric='total_requested_bytes', metric_name='mem') if args.profile_time: # warm up two rounds logger.info("warm up for two rounds...") for _ in range(2): sess.run( [logits_ph], feed_dict=inputs_dict, ) logger.info("start running 10 rounds...") start_time = time.time() # bench 10 rounds, take avg for _ in range(10): sess.run( [logits_ph], feed_dict=inputs_dict, ) end_time = time.time() print('infer_time: {:.4f} s'.format((end_time - start_time) / 10))
def main(args): config = get_config_from_args(args, mode='train') train(config)