Beispiel #1
0
    def __init__(self, args):
        print("Loading BERT configs...")
        with open("bert_config.json") as f:
            config_json = json.load(f)

        config = BertConfig(
            attention_probs_dropout_prob=config_json[
                "attention_probs_dropout_prob"],
            hidden_act=config_json["hidden_act"],
            hidden_dropout_prob=config_json["hidden_dropout_prob"],
            hidden_size=config_json["hidden_size"],
            initializer_range=config_json["initializer_range"],
            intermediate_size=config_json["intermediate_size"],
            max_position_embeddings=config_json["max_position_embeddings"],
            num_attention_heads=config_json["num_attention_heads"],
            num_hidden_layers=config_json["num_hidden_layers"],
            type_vocab_size=config_json["type_vocab_size"],
            vocab_size=config_json["vocab_size"])

        print("Loading PyTorch model...")
        self.model = BertForQuestionAnswering(config)
        self.model.eval()
        self.model.cuda()
        self.model.load_state_dict(
            torch.load(
                "build/data/bert_tf_v1_1_large_fp32_384_v2/model.pytorch"))

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                                   self.process_latencies)
        print("Finished constructing SUT.")

        self.qsl = get_squad_QSL(args.max_examples)
Beispiel #2
0
    def __init__(self):
        print("Loading TF model...")
        self.sess = tf.Session()
        with gfile.FastGFile('build/data/bert_tf_v1_1_large_fp32_384_v2/model.pb', 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            self.sess.graph.as_default()
            tf.import_graph_def(graph_def, name='')

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
        print("Finished constructing SUT.")

        self.qsl = get_squad_QSL()
Beispiel #3
0
    def __init__(self, quantized):
        print("Loading ONNX model...")
        self.quantized = quantized
        if not quantized:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/model.onnx"
        else:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/bert_large_v1_1_fake_quant.onnx"
        self.sess = onnxruntime.InferenceSession(model_path)

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                                   self.process_latencies)
        self.qsl = get_squad_QSL()
        print("Finished constructing SUT.")
Beispiel #4
0
    def __init__(self, batch_size=8):
        print("Loading TF model...")
        bert_config = modeling.BertConfig.from_json_file("bert_config.json")

        model_fn = self.model_fn_builder(
            bert_config=bert_config,
            init_checkpoint="build/data/bert_tf_v1_1_large_fp32_384_v2/model.ckpt-5474")

        self.estimator = tf.estimator.Estimator(model_fn=model_fn)
        self.batch_size = batch_size

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
        self.qsl = get_squad_QSL()
        print("Finished constructing SUT.")
    def __init__(self, args):
        self.profile = args.profile
        self.options = onnxruntime.SessionOptions()
        self.options.enable_profiling = args.profile

        print("Loading ONNX model...")
        self.quantized = args.quantized
        if self.quantized:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/bert_large_v1_1_fake_quant.onnx"
        else:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/model.onnx"
        self.sess = onnxruntime.InferenceSession(model_path, self.options)

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                                   self.process_latencies)
        print("Finished constructing SUT.")

        self.qsl = get_squad_QSL(args.max_examples)
Beispiel #6
0
    def __init__(self, args):
        print("Loading TF model...")
        infer_config = tf.compat.v1.ConfigProto()
        infer_config.intra_op_parallelism_threads = int(os.environ['TF_INTRA_OP_PARALLELISM_THREADS']) \
                if 'TF_INTRA_OP_PARALLELISM_THREADS' in os.environ else os.cpu_count()
        infer_config.inter_op_parallelism_threads = int(os.environ['TF_INTER_OP_PARALLELISM_THREADS']) \
                if 'TF_INTER_OP_PARALLELISM_THREADS' in os.environ else os.cpu_count()
        infer_config.use_per_session_threads = 1
        self.sess = tf.compat.v1.Session(config=infer_config)

        with gfile.FastGFile(
                'build/data/bert_tf_v1_1_large_fp32_384_v2/model.pb',
                'rb') as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())
            self.sess.graph.as_default()
            tf.import_graph_def(graph_def, name='')

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                                   self.process_latencies)
        print("Finished constructing SUT.")

        self.qsl = get_squad_QSL(args.max_examples)