Ejemplo n.º 1
0
def main(args):
    config = get_config_from_args(args, mode='dev')
    config.batch_size = args.batch_size
    logger.info('config: \n{}'.format('\n'.join(
        ['{}: {}'.format(i[0], i[1]) for i in sorted(config.items())])))

    record_parser = get_record_parser(config.model, config.task)
    predict_input_fn = input_fn_builder(record_parser, config)(config)
    iterator = predict_input_fn.make_initializable_iterator()
    logger.info("running in batch mode...")
    checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)
    out_dir = args.out_dir
    os.makedirs(out_dir, exist_ok=True)

    with tf.Session() as sess:
        features, labels = iterator.get_next()

        model = get_task_model_class(config.model, config.task)(config)
        feed_fn, output_tensors = model.infer_graph(config)

        saver = tf.train.Saver(var_list=tf.global_variables())
        logger.info(
            "restoring model weights from: {}...".format(checkpoint_path))
        saver.restore(sess, checkpoint_path)
        batches = 1
        sess.run(iterator.initializer)
        while True:
            try:
                feature_values = sess.run(features)
                # logger.info('feature_values={}...'.format(feature_values))
                feed = feed_fn(feature_values)

                attentions, encoded_output = sess.run(
                    [model.attentions, model.encoded_output], feed_dict=feed)
                layers = len(encoded_output)
                for layer in range(layers):
                    feature_values['layer_{}'.format(
                        layer)] = encoded_output[layer]
                    feature_values['attn_{}'.format(layer)] = attentions[layer]

                if batches < 3:
                    logger.info('num layers={}'.format(layers))
                    logger.info('\n'.join([
                        '{}={}'.format(k, v.shape)
                        for k, v in feature_values.items()
                    ]))

                output_path = os.path.join(
                    out_dir, '{}_b{}'.format(args.model, batches))
                logger.info('saving outputs for b={}...'.format(batches))
                np.savez_compressed(output_path, **feature_values)
                logger.info('outputs saved to: {}'.format(output_path))

                batches += 1
            except tf.errors.OutOfRangeError:
                logger.info('all done')
                break
Ejemplo n.º 2
0
def main(args):
    config = get_config_from_args(args, mode='infer')
    max_seq_length = args.max_seq_length or config.max_seq_length
    config.max_seq_length = max_seq_length
    logger.info("exporting {} model...".format(config.model))
    checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)

    with tf.Session() as sess:

        model = get_task_model_class(config.model, config.task)(config)
        input_nodes, logits_ph = model.export_graph(config,
                                                    training=False,
                                                    logits=True)

        saver = tf.train.Saver(var_list=tf.global_variables())
        logger.info('begin restoring model from checkpoints...')
        saver.restore(sess, checkpoint_path)

        inference_graph_file = config.inference_graph

        saved_model_path = os.path.join(os.path.dirname(inference_graph_file),
                                        'saved_model')
        if not os.path.exists(saved_model_path):
            logger.info("exporting saved_model...")
            tf.saved_model.simple_save(sess,
                                       saved_model_path,
                                       inputs=input_nodes,
                                       outputs={'logits': logits_ph})

        if args.quantize:
            save_name = "{}.quant.tflite".format(model.name)
        else:
            save_name = "{}.tflite".format(model.name)
        tflite_file = os.path.join(os.path.dirname(inference_graph_file),
                                   save_name)
        if not os.path.exists(tflite_file):
            logger.info("exporting tflite model...")
            converter = tf.lite.TFLiteConverter.from_session(
                sess, list(input_nodes.values()), [logits_ph])
            if args.quantize:
                converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

            converter.target_ops = [
                tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
            ]
            tflite_model = converter.convert()
            with open(tflite_file, "wb") as f:
                f.write(tflite_model)
        """freeze_graph --input_saved_model_dir=data/ckpt/bert/saved_model \
Ejemplo n.º 3
0
def serve(args):
    config = get_config_from_args(args, mode='infer')
    # tf.enable_eager_execution()
    # tf.set_random_seed(config.random_seed)
    checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)

    # initialize model
    sess = tf.Session()
    model = get_task_model_class(config.model, config.task)(config)
    feed_fn, output_tensors = model.infer_graph(config)
    saver = tf.train.Saver(var_list=tf.global_variables())
    saver.restore(sess, checkpoint_path)

    logger.info("{} loaded, waiting for questions...".format(checkpoint_path))

    while True:
        msg = request_queue.get()
        if msg is None:
            break
        # call model to do prediction
        (request_id, model_id, inputs) = msg
        logger.info("begin preprocessing on request={}".format(request_id))
        outputs = []
        input_features = model.text_to_feature(inputs, config)
        logger.info("begin predicting on request={}".format(request_id))
        total_batches = len(input_features) // args.batch_size
        for batch_feature in tqdm(batch(input_features, args.batch_size),
                                  total=total_batches):
            feed = feed_fn(batch_feature)
            # logger.info("{}: batch {} started...".format(request_id, idx))

            model_outputs = sess.run(output_tensors, feed)
            output = model.prepare_outputs(model_outputs, config,
                                           batch_feature)
            # logger.info("{}: batch {} done...".format(request_id, idx))
            outputs.extend(output)
            # prediction_answers = decode_answer(
            #     contexts, context_spans, start_predictions, end_predictions,
            #     output_char_start)
            # all_answers.extend(prediction_answers)
            # all_probabilities.extend([round(float(s), 6)
            # for s in norm_scores])
        logger.info("prediction for {} finished".format(request_id))
        response_queue.put((request_id, model_id, outputs))
Ejemplo n.º 4
0
def main(args):
    config = get_config_from_args(args, mode='infer')

    max_seq_length = args.max_seq_length or config.max_seq_length
    config.max_seq_length = max_seq_length
    contexts = [
        "The American Football Conference (AFC) champion Denver Broncos "
        "defeated the National Football Conference (NFC) champion Carolina "
        "Panthers 24–10 to earn their third Super Bowl title.",
        "The game was played on February 7, 2016, at Levi's Stadium in the "
        "San Francisco Bay Area at Santa Clara, California.",
        "College sports are also popular in southern California. "
        "The UCLA Bruins and the USC Trojans both field teams in NCAA Division"
        " I in the Pac-12 Conference, and there is a longtime "
        "rivalry between the schools.",
    ]

    questions = [
        "What is the AFC short for?",
        "What day was the game played on?",
        "What other kind of sport is popular in southern California?",
    ]
    max_answer_span = args.max_answer_span or config.max_answer_span
    config.max_answer_span = max_answer_span
    text_inputs = [{
        'qid': qid,
        'question': q,
        'context': ctx
    } for qid, (q, ctx) in enumerate(zip(questions, contexts))]
    outputs = []
    if args.eager:

        logger.info("running in eager mode...")
        tf.enable_eager_execution()
        checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)

        logger.info("restoring weights from: {}...".format(checkpoint_path))
        # with tf.contrib.eager.restore_variables_on_create(None):
        with tf.contrib.eager.restore_variables_on_create(checkpoint_path):

            model = get_task_model_class(config.model, config.task)(config)
            logger.info("warming up model...")
            model.warm_up()

        # trainable_count = int(numpy.sum([tf.keras.backend.count_params(p)
        # for p in set(model.trainable_weights)]))
        # non_trainable_count = int(numpy.sum([tf.keras.backend.count_params(p)
        # for p in set(model.non_trainable_weights)]))
        # print('trainable_count', abbreviate(trainable_count))
        # print('non_trainable_count', abbreviate(non_trainable_count))
        # # #### testing TF 2.0 ####
        # logger.info("restoring model weights...")
        # model = get_model(config)
        # checkpoint = tf.train.Checkpoint(model=model)
        # checkpoint.restore(os.path.join(config.checkpoint_dir, 'ckpt-1'))
        # with tf.contrib.eager.restore_variables_on_create(checkpoint_path):
        #
        #     model = get_model(config)
        #     logger.info("warming up model...")
        #     model.warm_up(config)
        # checkpoint = tf.train.Checkpoint(model=model)
        # manager = tf.train.CheckpointManager(checkpoint,
        # os.path.join(config.checkpoint_dir, 'keras1.14'),  max_to_keep=1)
        # manager.save()

        text_features = model.text_to_feature(text_inputs, config)
        # inputs_tensor = [tf.convert_to_tensor(i) for i in inputs]
        logger.info("begin inferring...")
        start_time = time.time()
        model_outputs = model.infer(text_features)
        output = model.prepare_outputs(model_outputs, config, text_features)
        logger.info('output={}\n\n'.format(output))
        outputs.extend(output)
    else:
        logger.info("running in graph mode...")
        checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)
        with tf.Session() as sess:

            model = get_task_model_class(config.model, config.task)(config)
            feed_fn, output_tensors = model.infer_graph(config)

            # inference_graph_file = config.inference_graph
            # if not os.path.exists(inference_graph_file):
            #     logger.info("generating inference graph...")
            #     graph_def = sess.graph_def
            #     with tf.gfile.GFile(inference_graph_file, 'wb') as f:
            #         f.write(graph_def.SerializeToString())
            #     with tf.io.gfile.GFile(inference_graph_file + '.txt', 'w') as f:
            #         f.write(str(graph_def))
            #     logger.info("inference graph saved to: {}".format(
            #         inference_graph_file))

            saver = tf.train.Saver(var_list=tf.global_variables())
            logger.info('begin restoring model from checkpoints...')
            saver.restore(sess, checkpoint_path)

            logger.info('begin predicting...')
            text_features = model.text_to_feature(text_inputs, config)
            start_time = time.time()
            for text_features in batch(text_features, args.batch_size):
                feed = feed_fn(text_features)
                model_outputs = sess.run(output_tensors, feed)
                output = model.prepare_outputs(model_outputs, config,
                                               text_features)
                logger.info('output={}\n\n'.format(output))
                outputs.extend(output)
    end_time = time.time()
    logger.info('infer time: {:.4f} s'.format(end_time - start_time))
    for q, c, a in zip(questions, contexts, outputs):
        logger.info('q={}\na={}\n\tcontext={}\n\n'.format(q, a, c))
Ejemplo n.º 5
0
def main(args):
    config = get_config_from_args(args, mode='infer')
    max_seq_length = args.max_seq_length or config.max_seq_length
    config.max_seq_length = max_seq_length
    max_answer_span = args.max_answer_span or config.max_answer_span
    config.max_answer_span = max_answer_span

    model_file = args.model_file
    questions = [
        "What is the AFC short for?",
        # "What day was the game played on?",
    ]
    contexts = [
        "The American Football Conference (AFC) champion Denver Broncos defeated the National "
        "Football Conference (NFC) champion Carolina Panthers 24–10 to earn their third Super Bowl title.",
        # "The game was played on February 7, 2016, at Levi's Stadium in the San Francisco Bay Area " \
        # "at Santa Clara, California.",
    ]

    logger.info("running in eager mode...")
    tf.enable_eager_execution()
    checkpoint_path = tf.train.latest_checkpoint(config.checkpoint_dir)

    logger.info("restoring model weights...")

    with tf.contrib.eager.restore_variables_on_create(checkpoint_path):

        model = get_model(config)
        logger.info("warming up model...")
        model.warm_up(config)

    context_spans, inputs = model.get_inputs(questions, contexts, config)
    inputs_tensor = [
        tf.convert_to_tensor(i, dtype=tf.int32) for i in inputs.values()
    ]
    logger.info("begin inferring...")
    start_predictions, end_predictions, norm_scores = model.infer(
        inputs_tensor, max_answer_span=config.max_answer_span, export=True)

    prediction_answers = decode_answer(contexts, context_spans,
                                       start_predictions, end_predictions)
    for q, c, a, ns in zip(questions, contexts, prediction_answers,
                           norm_scores):
        logger.info('q={}\na={}\n\tcontext={}\n\n'.format(
            q, (a, round(float(ns), 4)), c))

    print(model.embeddings.shape)
    print(model.logits.shape)
    input_ids = inputs_tensor[0]
    print(input_ids.shape)

    input_ids_file = os.path.join(os.path.dirname(model_file), 'input_ids')
    input_embeddings_file = os.path.join(os.path.dirname(model_file),
                                         'input_embeddings')
    output_logits_file = os.path.join(os.path.dirname(model_file),
                                      'output_logits')
    np.save(input_ids_file, input_ids)
    np.save(input_embeddings_file, model.embeddings)
    np.save(output_logits_file, model.logits)

    # Load TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path=model_file)
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Test model on random input data.
    print(input_details)
    print(output_details)
    print(model.logits)
    interpreter.set_tensor(input_details[0]['index'], input_ids)
    interpreter.set_tensor(input_details[1]['index'], model.embeddings)

    interpreter.invoke()

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    output_data = interpreter.get_tensor(output_details[0]['index'])
    print(output_data.shape)
    print(output_data)
    print(np.allclose(output_data, model.logits, rtol=1e-4))
Ejemplo n.º 6
0
def main(args):
    config = get_config_from_args(args, mode='dev')
    config.iterate_checkpoints = args.iterate_checkpoints
    config.checkpoint_path = args.checkpoint_path
    config.iterate_timeout = args.iterate_timeout
    evaluate(config)
Ejemplo n.º 7
0
def main(args):
    config = get_config_from_args(args, mode='infer')
    model_name = config.model
    kwargs = dict(training=False, logits=True)
    if model_name == 'ebert':
        kwargs['fake_cache_first'] = args.cache_segment == 1
        kwargs['fake_cache_second'] = args.cache_segment == 2

    config.batch_size = args.batch_size
    config.max_seq_length = args.max_seq_length or config.max_seq_length
    logger.info("running in graph mode...")
    run_metadata = tf.RunMetadata()

    with tf.Session() as sess:
        model = get_task_model_class(config.model, task=args.task)(config)
        inputs_dict, logits_ph = model.export_graph(config, **kwargs)
        sess.run(tf.global_variables_initializer())
        # saver = tf.train.Saver()
        # saver.save(sess, 'data/sbert', write_meta_graph=False)
        opt_builder = tf.profiler.ProfileOptionBuilder
        if args.print_parameters:
            tf.profiler.profile(
                sess.graph,
                options=opt_builder.trainable_variables_parameter())

        if not args.not_profile_flops:
            prof_options = opt_builder.float_operation()
            prof_options['hide_name_regexes'] = ['.*/Initializer/.*']
            tfprof_node = tf.profiler.profile(sess.graph, options=prof_options)
            profile_metric(model_name,
                           tfprof_node,
                           metric='total_float_ops',
                           metric_name='flops')

        if args.profile_memory:
            options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = run_metadata
        else:
            options = None
            run_metadata = None
        _ = sess.run([logits_ph],
                     feed_dict=inputs_dict,
                     options=options,
                     run_metadata=run_metadata)

        if args.profile_memory:
            opts = tf.profiler.ProfileOptionBuilder(
                tf.profiler.ProfileOptionBuilder.time_and_memory()).build()

            tfprof_node = tf.profiler.profile(tf.get_default_graph(),
                                              run_meta=run_metadata,
                                              cmd='scope',
                                              options=opts)

            profile_metric(model_name,
                           tfprof_node,
                           metric='total_requested_bytes',
                           metric_name='mem')

        if args.profile_time:
            # warm up two rounds
            logger.info("warm up for two rounds...")

            for _ in range(2):
                sess.run(
                    [logits_ph],
                    feed_dict=inputs_dict,
                )

            logger.info("start running 10 rounds...")
            start_time = time.time()
            # bench 10 rounds, take avg
            for _ in range(10):
                sess.run(
                    [logits_ph],
                    feed_dict=inputs_dict,
                )
            end_time = time.time()
            print('infer_time: {:.4f} s'.format((end_time - start_time) / 10))
Ejemplo n.º 8
0
def main(args):
    config = get_config_from_args(args, mode='train')
    train(config)