コード例 #1
0
    def prepend_ld_preload(self, so_path):
        if "LD_PRELOAD" in self.env_vars:
            self.env_vars["LD_PRELOAD"] = ":".join([so_path, self.env_vars["LD_PRELOAD"]])
        else:
            self.env_vars["LD_PRELOAD"] = so_path

        logging.info("Updated LD_PRELOAD: " + self.env_vars["LD_PRELOAD"])
コード例 #2
0
    def run(self):
        self.load_val_images()
        logging.info("Running accuracy check on {:} images.".format(self.num_images))

        class_predictions = []
        batch_idx = 0
        for image_idx in range(0, self.num_images, self.batch_size):
            actual_batch_size = self.batch_size if image_idx + self.batch_size <= self.num_images else self.num_images - image_idx
            batch_images = self.image_list[image_idx:image_idx + actual_batch_size]
            # DLA does not support batches that are less than the engine's configured batch size. Pad with junk.
            while len(batch_images) < self.batch_size:
                batch_images.append(self.image_list[0])
            batch_images = np.ascontiguousarray(np.stack([np.load(os.path.join(self.image_dir, name + ".npy")) for name in batch_images]))

            start_time = time.time()
            outputs = self.runner([batch_images], self.batch_size)
            if self.verbose:
                logging.info("Batch {:d} (Size {:}) >> Inference time: {:f}".format(batch_idx, actual_batch_size, time.time() - start_time))

            class_predictions.extend(outputs[0][:actual_batch_size])
            batch_idx += 1

        class_list = self.class_list[:self.num_images]
        num_matches = np.sum(np.array(class_list) == np.array(class_predictions))
        accuracy = float(num_matches) / len(class_list)
        return accuracy
コード例 #3
0
def get_score(predictions):

    logging.info("Evaluating predictions...")

    input_file = "build/data/squad/dev-v1.1.json"

    with open(input_file) as f:
        data = json.load(f)["data"]

    f1_score_total = 0.0
    exact_score_total = 0.0
    sample_idx = 0
    for task in data:
        title = task["title"]
        for paragraph_idx, paragraph in enumerate(task["paragraphs"]):
            context = paragraph["context"]
            for q_idx, qas in enumerate(paragraph["qas"]):
                if sample_idx < len(predictions):
                    answers = qas["answers"]
                    f1_score_this = 0.0
                    exact_score_this = 0.0
                    for answer in answers:
                        f1_score_this = max(f1_score_this, f1_score(predictions[sample_idx], answer["text"]))
                        exact_score_this = max(exact_score_this, exact_match_score(predictions[sample_idx], answer["text"]))
                    f1_score_total += f1_score_this
                    exact_score_total += exact_score_this
                sample_idx += 1

    f1_score_avg = f1_score_total / len(predictions) * 100
    exact_score_avg = exact_score_total / len(predictions) * 100

    return (exact_score_avg, f1_score_avg)
コード例 #4
0
    def infer_decoder(self):
        batch_idx = 0
        max_seq_length = 1152 // 2

        (infer_ndtype, dtype_esize) = get_dtype_info(self.args.input_dtype)

        self.outputs = []
        for image_idx in range(0, self.num_samples, self.batch_size):
            # Actual batch size might be smaller than max batch size
            actual_batch_size = self.batch_size if image_idx + self.batch_size <= self.num_samples else self.num_samples - image_idx

            start_time = time.time()

            input_port = np.ascontiguousarray(np.random.randint(
                0, high=self.hyperP.labels_size, size=(actual_batch_size, 1)),
                                              dtype=np.int32)

            # iterate over seq id
            for seq_id in range(max_seq_length):
                predictions = self.decoder._run_decoder([input_port], seq_id,
                                                        actual_batch_size)
                predictions = predictions.reshape(
                    (actual_batch_size, self.hyperP.decoder_hidden_size))
                winners = np.argmax(predictions, axis=1)
                self.outputs.extend(winners[:actual_batch_size])
                input_port = np.minimum(winners, self.hyperP.labels_size - 1)
                input_port = np.ascontiguousarray(input_port.reshape(
                    (actual_batch_size, 1)),
                                                  dtype=np.int32)

            logging.info(
                "Batch {:d} (Size {:}) >> Inference time: {:f}".format(
                    batch_idx, actual_batch_size,
                    time.time() - start_time))
            batch_idx += 1
コード例 #5
0
 def add_fc(self):
     """
     add FC layer
     """
     logging.info("Adding FC layer")
     # fetch some attrs from old fc1000; note MatMul doesn't have bias
     old_fc_op = [_n for _n in self.graph.nodes if _n.name == "fc1000"][0]
     old_fc_kernel = old_fc_op.inputs[1]
     fc_kernel_weights = old_fc_kernel.values[:, 1:]
     # instantiate fc weight
     # NOTE: expects KM weight, if transpose is not set (default not set)
     fc_weight = gs.Constant("fc_replaced_weight", values=fc_kernel_weights)
     # find input to fc to be added
     squeeze_replaced_op = [
         _n for _n in self.graph.nodes if _n.name == "squeeze_replaced"
     ][0]
     squeeze_replaced_out = squeeze_replaced_op.outputs[0]
     # reshape input
     reshape_shape = np.array([-1, fc_kernel_weights.shape[0]],
                              dtype=np.int64)
     fc_reshape_shape = gs.Constant("fc_reshape_shape",
                                    values=reshape_shape)
     # add FC: Reshape=>MatMul
     fc_reshape_out = self.graph.Reshape("fc_reshape_input",
                                         squeeze_replaced_out,
                                         fc_reshape_shape)
     fc_out = self.graph.MatMul("fc_replaced", fc_reshape_out, fc_weight)
コード例 #6
0
ファイル: main.py プロジェクト: kllmia/inference_results_v0.7
def _generate_harness_object(config, profile):
    # Refactors harness generation for use by functions other than handle_run_harness
    benchmark_name = config['benchmark']
    if config.get("use_triton"):
        from code.common.server_harness import TritonHarness
        harness = TritonHarness(config, name=benchmark_name)
        config["inference_server"] = "triton"
    elif benchmark_name == BENCHMARKS.BERT:
        from code.bert.tensorrt.harness import BertHarness
        harness = BertHarness(config, name=benchmark_name)
        config["inference_server"] = "custom"
    elif benchmark_name == BENCHMARKS.DLRM:
        from code.dlrm.tensorrt.harness import DLRMHarness
        harness = DLRMHarness(config, name=benchmark_name)
        config["inference_server"] = "custom"
    elif benchmark_name == BENCHMARKS.RNNT:
        from code.rnnt.tensorrt.harness import RNNTHarness
        harness = RNNTHarness(config, name=benchmark_name)
        config["inference_server"] = "custom"
    else:
        from code.common.lwis_harness import LWISHarness
        harness = LWISHarness(config, name=benchmark_name)

    # Attempt to run profiler. Note that this is only available internally.
    if profile is not None:
        try:
            from code.internal.profiler import ProfilerHarness
            harness = ProfilerHarness(harness, profile)
        except BaseException:
            logging.info("Could not load profiler: Are you an internal user?")

    return harness, config
コード例 #7
0
    def infer_joint(self):
        (infer_ndtype, dtype_esize) = get_dtype_info(self.args.input_dtype)

        batch_idx = 0
        self.outputs = []
        for image_idx in range(0, self.num_samples, self.batch_size):
            # Actual batch size might be smaller than max batch size
            actual_batch_size = self.batch_size if image_idx + self.batch_size <= self.num_samples else self.num_samples - image_idx

            start_time = time.time()

            max_seq_length = 1152 + 1152 // 2  # U=1152//2 + T=1152   FIXME
            encoder_input_size = self.hyperP.encoder_hidden_size
            decoder_input_size = self.hyperP.decoder_hidden_size

            for seq_idx in range(max_seq_length):
                # input ports
                enc_input_port = np.ascontiguousarray(np.random.rand(
                    actual_batch_size, 1, encoder_input_size),
                                                      dtype=infer_ndtype)
                dec_input_port = np.ascontiguousarray(np.random.rand(
                    actual_batch_size, 1, decoder_input_size),
                                                      dtype=infer_ndtype)
                inputs = [enc_input_port, dec_input_port]

                outputs = self.joint(inputs, actual_batch_size)
                self.outputs.extend(outputs[0][:actual_batch_size])

            logging.info(
                "Batch {:d} (Size {:}) >> Inference time: {:f}".format(
                    batch_idx, actual_batch_size,
                    time.time() - start_time))
            batch_idx += 1
コード例 #8
0
    def convert(self, image_list):
        for idx, img_file in enumerate(image_list):
            logging.info("Processing image No.{:d}/{:d}...".format(
                idx, len(image_list)))
            output_files = [
                self.get_filename(format, img_file)
                for format in self.run_formats
            ]

            if all([os.path.exists(i)
                    for i in output_files]) and not self.overwrite:
                logging.info(
                    "Skipping {:} because it already exists.".format(img_file))
                continue

            image_fp32 = self.loader(os.path.join(self.src_dir, img_file))
            if "fp32" in self.run_formats:
                np.save(self.get_filename("fp32", img_file), image_fp32)
            image_int8_linear = self.quantizer(image_fp32)
            if "int8_linear" in self.run_formats:
                np.save(self.get_filename("int8_linear", img_file),
                        image_int8_linear)
            image_int8_chw4 = self.linear_to_chw4(image_int8_linear)
            if "int8_chw4" in self.run_formats:
                np.save(self.get_filename("int8_chw4", img_file),
                        image_int8_chw4)
コード例 #9
0
ファイル: main.py プロジェクト: kllmia/inference_results_v0.7
def copy_default_engine(benchmark):
    new_path = benchmark._get_engine_name(None, None)  # Use default values
    benchmark.config_ver = "default"
    default_path = benchmark._get_engine_name(None, None)

    logging.info("Copying {:} to {:}".format(default_path, new_path))
    shutil.copyfile(default_path, new_path)
コード例 #10
0
def main():
    args = common_args.parse_args(common_args.ACCURACY_ARGS)
    logging.info("Running accuracy test...")
    run_SSDResNet34_accuracy(args["engine_file"],
                             args["batch_size"],
                             args["num_samples"],
                             verbose=args["verbose"])
コード例 #11
0
    def dump_embedding_weights_to_binary_file(self):
        logging.info("Writing quantized embedding weights to " + self.embedding_weights_binary_filepath)

        with open(self.embedding_weights_binary_filepath,'wb') as f:
            f.write(struct.pack('i', self.num_features))

            # Calculate the maximum absolute value of embedding weights for each table
            mults = np.ndarray(shape=(self.num_features))
            for feature_id in range(self.num_features):
                weight_tensor_name = "emb_l." + str(feature_id) + ".weight"
                embeddings = self.weights[weight_tensor_name].numpy()
                maxAbsVal = abs(max(embeddings.max(), embeddings.min(), key=abs))
                mults[feature_id] = 127.5 / maxAbsVal
                embeddingsScale = 1.0 / mults[feature_id]
                f.write(struct.pack('f', embeddingsScale))

            for feature_id in range(self.num_features):
                weight_tensor_name = "emb_l." + str(feature_id) + ".weight"
                embeddings = self.weights[weight_tensor_name].numpy()
                if (embeddings.shape[0] != self.embedding_rows[feature_id]):
                    raise IOError("Expected " + str(self.embedding_rows[feature_id]) + " embedding rows, but got " + str(embeddings.shape[0]) + " rows for feature " + str(feature_id))
                embeddingsQuantized = np.minimum(np.maximum(np.rint(np.multiply(embeddings, mults[feature_id])), -127), 127).astype('int8')
                # Remove the embedding weights, we don't need them any longer 
                del self.weights[weight_tensor_name]
                # Write quantized embeddings to file
                embeddingsQuantized.tofile(f)
コード例 #12
0
def cleanup():
    """Delete files for audit cleanup."""
    tmp_files = ["audit.config", "verify_accuracy.txt", "verify_performance.txt", "mlperf_log_accuracy_baseline.json", "accuracy.txt", "predictions.json"]
    for fname in tmp_files:
        if os.path.exists(fname):
            logging.info('Audit cleanup: Removing file {}'.format(fname))
            os.remove(fname)
コード例 #13
0
def turn_on_mps(active_sms):
    if not is_xavier():
        turn_off_mps()
        cmd = "export CUDA_MPS_ACTIVE_THREAD_PERCENTAGE={:d} && nvidia-cuda-mps-control -d".format(
            active_sms)
        logging.info("Turn on MPS with active_sms = {:d}.".format(active_sms))
        run_command(cmd)
コード例 #14
0
def run_dlrm_accuracy(engine_file,
                      batch_size,
                      num_pairs=10000000,
                      verbose=False):
    if verbose:
        logging.info("Running DLRM accuracy test with:")
        logging.info("    engine_file: {:}".format(engine_file))
        logging.info("    batch_size: {:}".format(batch_size))
        logging.info("    num_pairs: {:}".format(num_pairs))

    runner = EngineRunner(engine_file, verbose=verbose)
    pair_dir = os.path.join(
        os.getenv("PREPROCESSED_DATA_DIR", "build/preprocessed_data"),
        "criteo", "full_recalib")

    input_dtype, input_format = get_input_format(runner.engine)
    if input_dtype == trt.DataType.FLOAT:
        format_string = "fp32"
    elif input_dtype == trt.DataType.HALF:
        format_string = "fp16"
    elif input_dtype == trt.DataType.INT8:
        format_string = "int8"
        if input_format == trt.TensorFormat.CHW4:
            format_string += "_chw4"
    else:
        raise NotImplementedError(
            "Unsupported DataType {:}".format(input_dtype))

    numerical_inputs = np.load(
        os.path.join(pair_dir, "numeric_{:}.npy".format(format_string)))
    categ_inputs = np.load(os.path.join(pair_dir, "categorical_int32.npy"))

    predictions = []
    refs = []
    batch_idx = 0
    for pair_idx in range(0, int(num_pairs), batch_size):
        actual_batch_size = batch_size if pair_idx + batch_size <= num_pairs else num_pairs - pair_idx
        numerical_input = np.ascontiguousarray(
            numerical_inputs[pair_idx:pair_idx + actual_batch_size])
        categ_input = np.ascontiguousarray(categ_inputs[pair_idx:pair_idx +
                                                        actual_batch_size])

        start_time = time.time()
        outputs = runner([numerical_input, categ_input], actual_batch_size)

        if verbose:
            logging.info(
                "Batch {:d} (Size {:}) >> Inference time: {:f}".format(
                    batch_idx, actual_batch_size,
                    time.time() - start_time))

        predictions.extend(outputs[0][:actual_batch_size])

        batch_idx += 1

    ground_truths = np.load(os.path.join(
        pair_dir, "ground_truth.npy"))[:num_pairs].tolist()

    return evaluate(ground_truths, predictions)
コード例 #15
0
    def __init__(self, args):
        """Set up the config and calibrator for DLRM. Does not initialize."""

        workspace_size = dict_get(args, "workspace_size", default=(4 << 30))
        logging.info("Using workspace size: {:,}".format(workspace_size))

        super().__init__(args, name=BENCHMARKS.DLRM, workspace_size=workspace_size)

        with open("code/dlrm/tensorrt/mlperf_40m.limit.json") as f:
            self.dlrm_config = json.load(f)
        logging.info("DLRM config: {:}".format(self.dlrm_config))
        self.num_numerical_inputs = self.dlrm_config["num_numerical_features"]
        self.num_features = len(self.dlrm_config["categorical_feature_sizes"])
        self.num_interactions = (self.num_features + 1) * self.num_features // 2
        self.embedding_size = self.dlrm_config["embedding_dim"]
        self.embedding_rows = self.dlrm_config["categorical_feature_sizes"]
        self.embedding_rows_bound = 40000000
        self.embedding_rows = [min(i, self.embedding_rows_bound) for i in self.embedding_rows]
        self.embedding_rows_total = np.sum(np.array(self.embedding_rows))
        self.bottom_mlp_channels = self.dlrm_config["bottom_mlp_sizes"]
        self.bottom_mlp_names = ["bot_l.0", "bot_l.2", "bot_l.4"]
        self.output_padding = self.args.get("output_padding_granularity", 32)
        self.top_mlp_input_size = (self.num_interactions + self.embedding_size + self.output_padding - 1) // self.output_padding * self.output_padding
        self.top_mlp_channels = self.dlrm_config["top_mlp_sizes"]
        self.top_mlp_names = ["top_l.0", "top_l.2", "top_l.4", "top_l.6", "top_l.8"]
        self.model_filepath = "build/models/dlrm/tb00_40M.pt"
        self.embedding_weights_binary_filepath = "build/models/dlrm/40m_limit/dlrm_embedding_weights_int8_v3.bin"
        self.model_without_embedding_weights_filepath = "build/models/dlrm/40m_limit/model_test_without_embedding_weights_v3.pt"
        self.row_frequencies_binary_filepath = "build/models/dlrm/40m_limit/row_frequencies.bin"
        self.row_frequencies_src_dir = "build/models/dlrm/40m_limit/row_freq"
        self.embedding_weights_on_gpu_part = self.args.get("embedding_weights_on_gpu_part", 1.0)
        self.use_row_frequencies = True if self.embedding_weights_on_gpu_part < 1.0 else False
        self.num_profiles = self.args.get("gpu_inference_streams", 1)
        self.use_small_tile_gemm_plugin = self.args.get("use_small_tile_gemm_plugin", False)
        self.gemm_plugin_fairshare_cache_size = self.args.get("gemm_plugin_fairshare_cache_size", -1)
        self.enable_interleaved_top_mlp = self.args.get("enable_interleaved_top_mlp", False)

        if self.precision == "fp16":
            self.apply_flag(trt.BuilderFlag.FP16)
        elif self.precision == "int8":
            self.apply_flag(trt.BuilderFlag.INT8)

        if self.precision == "int8":
            # Get calibrator variables
            calib_batch_size = dict_get(self.args, "calib_batch_size", default=512)
            calib_max_batches = dict_get(self.args, "calib_max_batches", default=500)
            force_calibration = dict_get(self.args, "force_calibration", default=False)
            cache_file = dict_get(self.args, "cache_file", default="code/dlrm/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args, "preprocessed_data_dir", default="build/preprocessed_data")
            calib_data_dir = os.path.join(preprocessed_data_dir, "criteo/full_recalib/val_data_128000")

            # Set up calibrator
            self.calibrator = DLRMCalibrator(calib_batch_size=calib_batch_size, calib_max_batches=calib_max_batches,
                                             force_calibration=force_calibration, cache_file=cache_file, data_dir=calib_data_dir)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
            self.need_calibration = force_calibration or not os.path.exists(cache_file)
        else:
            self.need_calibration = False
コード例 #16
0
def main():
    # Parse arguments to identify the data directory with the input images
    #   and the output directory for the preprocessed images.
    # The data dicretory is assumed to have the following structure:
    # <data_dir>
    #  └── imagenet
    # And the output directory will have the following structure:
    # <preprocessed_data_dir>
    #  └── imagenet
    #      └── ResNet50
    #          ├── fp32
    #          └── int8_linear
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_dir", "-d",
        help="Specifies the directory containing the input images.",
        default="build/data"
    )
    parser.add_argument(
        "--preprocessed_data_dir", "-o",
        help="Specifies the output directory for the preprocessed data.",
        default="build/preprocessed_data"
    )
    parser.add_argument(
        "--formats", "-t",
        help="Comma-separated list of formats. Choices: fp32, int8_linear, int8_chw4.",
        default="default"
    )
    parser.add_argument(
        "--overwrite", "-f",
        help="Overwrite existing files.",
        action="store_true"
    )
    parser.add_argument(
        "--cal_only",
        help="Only preprocess calibration set.",
        action="store_true"
    )
    parser.add_argument(
        "--val_only",
        help="Only preprocess validation set.",
        action="store_true"
    )
    args = parser.parse_args()
    data_dir = args.data_dir
    preprocessed_data_dir = args.preprocessed_data_dir
    formats = args.formats.split(",")
    overwrite = args.overwrite
    cal_only = args.cal_only
    val_only = args.val_only
    default_formats = ["int8_linear"]

    # Now, actually preprocess the input images
    logging.info("Loading and preprocessing images. This might take a while...")
    if args.formats == "default":
        formats = default_formats
    preprocess_imagenet_for_resnet50(data_dir, preprocessed_data_dir, formats, overwrite, cal_only, val_only)

    logging.info("Preprocessing done.")
コード例 #17
0
def main():
    args = common_args.parse_args(common_args.ACCURACY_ARGS)
    logging.info("Running accuracy test...")
    acc = run_dlrm_accuracy(args["engine_file"],
                            args["batch_size"],
                            args["num_samples"],
                            verbose=args["verbose"])
    logging.info("Accuracy: {:}".format(acc))
コード例 #18
0
def copy_default_engine(benchmark):
    """Copy engine file from default path to new path."""
    new_path = benchmark._get_engine_fpath(None, None)  # Use default values
    benchmark.config_ver = "default"
    default_path = benchmark._get_engine_fpath(None, None)

    logging.info("Copying {:} to {:}".format(default_path, new_path))
    shutil.copyfile(default_path, new_path)
コード例 #19
0
 def remove_obsolete(self):
     """
     Remove obsolete layers
     """
     logging.info("Removing obsolete layers")
     topk_op = [_n for _n in self.graph.nodes if _n.name == "topk_layer"][0]
     self.graph.outputs = topk_op.outputs
     self.cleanup_graph()
コード例 #20
0
 def print_match(pattern, match):
     for node in pattern:
         key = node["name"]
         value = match[key]
         if isinstance(value, trt.ILayer):
             logging.info(key + "=" + match[key].name)
         else:
             logging.info(key + "=" + value.__str__())
コード例 #21
0
def verify_test01(harness):
    # Compute path to results dir
    script_path = 'build/inference/compliance/nvidia/TEST01/run_verification.py'
    results_path = os.path.join('results', harness.get_system_name(), harness._get_submission_benchmark_name(), harness.scenario)
    logging.info('AUDIT HARNESS: ' + results_path + '/accuracy' + '\n' + results_path + '/performance')
    verification_command = 'python3 {} --results={} --compliance={} --output_dir={}'.format(
        script_path, results_path, harness.get_full_log_dir(), harness.get_full_log_dir())
    return run_command(verification_command, get_output=True)
コード例 #22
0
ファイル: main.py プロジェクト: kllmia/inference_results_v0.7
def handle_calibrate(config):
    benchmark_name = config["benchmark"]

    logging.info("Generating calibration cache for Benchmark \"{:}\"".format(benchmark_name))
    config = apply_overrides(config, common_args.CALIBRATION_ARGS)
    config["dla_core"] = None
    config["force_calibration"] = True
    b = get_benchmark(config)
    b.calibrate()
コード例 #23
0
    def get_engine_info(self):
        if self.verbose:
            logging.info("Loading engine to get engine info")

        def extract_dtype(s):
            if "INT8" in s:
                return "TYPE_INT8"
            elif "FP32" in s:
                return "TYPE_FP32"
            elif "INT32" in s:
                return "TYPE_INT32"
            elif "FP16" in s:
                return "TYPE_FP16"
            else:
                raise ValueError("Data type must be INT8 or FP32 or INT32, got {:}".format(s))

        format_rgx = re.compile(r"\(k[A-Z]+[0-9]*\)")

        # EngineRunner is the convention to load engines
        plugins = None
        if self.name in plugin_map:
            plugins = plugin_map[self.name]
            for plugin in plugins:
                self.check_file_exists(plugin)
        runner = EngineRunner(self.gpu_engine, verbose=self.verbose, plugins=plugins)
        inputs = []
        outputs = []
        # FIXME exploit the use of optimization profile if needed
        num_profiles = runner.engine.num_optimization_profiles
        num_bindings_per_profile = runner.engine.num_bindings // num_profiles
        has_dynamic_shape = False
        for idx in range(num_bindings_per_profile):
            tensor = {}
            tensor["name"] = runner.engine.get_binding_name(idx)
            binding_shape = runner.engine.get_binding_shape(idx)
            if -1 in binding_shape:
                tensor["dims"] = binding_shape[1:]
                has_dynamic_shape = True
            else:
                tensor["dims"] = binding_shape
            tensor["format"] = runner.engine.get_binding_format_desc(idx)
            tensor["dtype"] = extract_dtype(tensor["format"])
            match = format_rgx.search(tensor["format"])
            if match is None:
                raise ValueError("Invalid input format: {:}".format(tensor["format"]))
            tensor["dformat"] = match.group(0).strip("()")
            if runner.engine.binding_is_input(idx):
                inputs.append(tensor)
            else:
                outputs.append(tensor)

        is_static = not has_dynamic_shape and not runner.engine.has_implicit_batch_dimension

        # Clean up runner
        del runner

        return (inputs, outputs, [0], is_static)
コード例 #24
0
 def autosinian_optimize(self):
     logging.info("Applying AutoSinian Optimization...")
     optimize_points = [(10, 15), (21, 26), (27, 32), (38, 43), (44, 49),
                        (55, 60), (61, 66), (67, 72), (78, 83), (84, 89),
                        (90, 95), (0, 4), (5, 9), (16, 20), (33, 37),
                        (50, 54), (73, 77), (96, 100)]
     optimizer = AutoSinian_Optimizer(self.cache_file)
     for point in optimize_points:
         optimizer.optimize(self.network, point)
コード例 #25
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(5 << 30))
        logging.info("Using workspace size: {:,}".format(workspace_size))
        super().__init__(args,
                         name=BENCHMARKS.BERT,
                         workspace_size=workspace_size)
        self.bert_config_path = "code/bert/tensorrt/bert_config.json"

        self.seq_len = 384  # default sequence length

        self.batch_size = dict_get(args, "batch_size", default=1)

        self.num_profiles = 1
        if 'gpu_inference_streams' in args:
            # use gpu_inference_streams to determine the number of duplicated profiles
            # in the engine when not using lwis mode
            self.num_profiles = args['gpu_inference_streams']

        self.is_int8 = args['precision'] == 'int8'

        if self.is_int8:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1_fake_quant.onnx")
        else:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1.onnx")

        self.bert_config = BertConfig(self.bert_config_path)

        self.enable_interleaved = False
        if self.is_int8 and 'enable_interleaved' in args:
            self.enable_interleaved = args['enable_interleaved']

        # Small-Tile GEMM Plugin
        # Since it doesn't support interleaved format, two options are mutually exclusive
        self.use_small_tile_gemm_plugin = self.args.get(
            "use_small_tile_gemm_plugin", False)
        self.gemm_plugin_fairshare_cache_size = self.args.get(
            "gemm_plugin_fairshare_cache_size", -1)
        if self.enable_interleaved and self.use_small_tile_gemm_plugin:
            assert False, "Small-Tile GEMM Plugin doesn't support interleaved format."

        # Query system id for architecture
        self.system = get_system()
        self.gpu_arch = self.system.arch

        if self.batch_size > 512:
            # tactics selection is limited at very large batch sizes
            self.builder_config.max_workspace_size = 7 << 30
        if 'nx' in self.system.gpu.lower():
            # use 1GB only for XavierNX
            self.builder_config.max_workspace_size = 1 << 30
コード例 #26
0
 def print_matches(pattern, matches):
     matchNumber = 1
     if isinstance(matches, list):
         for match in matches:
             logging.info("Match number:", matchNumber)
             network_search.print_match(pattern, match)
             logging.info()
             matchNumber = matchNumber + 1
     else:
         print_match(pattern + "=" + match)
コード例 #27
0
 def rename_ops(self):
     """
     Rename op names as in self.op_name_map
     """
     logging.info("Renaming layers")
     for node in self.graph.nodes:
         if node.name in self.op_name_map:
             new_name = self.op_name_map[node.name]
             # logging.info("Renaming layer: {} -> {}".format(node.name, new_name))
             node.name = new_name
コード例 #28
0
def infer(args):
    hyperParam = RnnHyperParam(args)
    runner = RNNTRunner(args, hyperParam)
    logging.info("Start running inference -- topology : {:}".format(
        args.topology))
    start = time.time()
    runner.infer()
    end = time.time()
    elapsed = end - start
    logging.info("Inference takes {:f} secs. Throughput = {:f}/s".format(
        elapsed, args.num_samples / elapsed))
コード例 #29
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(1 << 30))
        logging.info("Use workspace_size: {:}".format(workspace_size))

        super().__init__(args,
                         name=BENCHMARKS.ResNet50,
                         workspace_size=workspace_size)

        # Model path
        self.model_path = dict_get(
            args,
            "model_path",
            default="code/resnet50/tensorrt/ofa_autosinian_is176.onnx")
        logging.info("Using AutoSinian optimized once-for-all network")

        self.cache_file = None
        self.need_calibration = False

        if self.precision == "int8":
            # Get calibrator variables
            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=1)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=500)
            force_calibration = dict_get(self.args,
                                         "force_calibration",
                                         default=False)
            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/resnet50/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args,
                                             "preprocessed_data_dir",
                                             default="build/preprocessed_data")
            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/imagenet/cal_map.txt")
            calib_image_dir = os.path.join(preprocessed_data_dir,
                                           "imagenet/ResNet50/fp32")

            # Set up calibrator
            self.calibrator = RN50Calibrator(
                calib_batch_size=calib_batch_size,
                calib_max_batches=calib_max_batches,
                force_calibration=force_calibration,
                cache_file=cache_file,
                image_dir=calib_image_dir,
                calib_data_map=calib_data_map)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
            self.need_calibration = force_calibration or not os.path.exists(
                cache_file)
コード例 #30
0
def load(audit_test, benchmark):
    # Calculates path to audit.config
    src_config = os.path.join('build/inference/compliance/nvidia', audit_test, benchmark, 'audit.config')
    logging.info('AUDIT HARNESS: Looking for audit.config in {}...'.format(src_config))
    if not os.path.isfile(src_config):
        # For tests that have one central audit.config instead of per-benchmark
        src_config = os.path.join('build/inference/compliance/nvidia', audit_test, 'audit.config')
        logging.info('AUDIT HARNESS: Search failed. Looking for audit.config in {}...'.format(src_config))
    # Destination is audit.config
    dest_config = 'audit.config'
    # Copy the file
    shutil.copyfile(src_config, dest_config)
    return dest_config