Beispiel #1
0
def torchscript_export(context, export_json, model, output_path, quantize,
                       target):
    """Convert a pytext model snapshot to a torchscript model."""
    export_cfg = ExportConfig()
    # only populate from export_json if no export option is configured from the command line.
    if export_json:
        export_json_config = _load_and_validate_export_json_config(export_json)

        read_chunk_size = export_json_config.pop("read_chunk_size", None)
        if read_chunk_size is not None:
            print("Warning: Ignoring read_chunk_size.")

        if export_json_config.get("read_chunk_size", None) is not None:
            print(
                "Error: Do not know what to do with read_chunk_size.  Ignoring."
            )

        if "export" in export_json_config.keys():
            export_cfgs = [export_json_config["export"]]
        else:
            export_cfgs = export_json_config["export_list"]

        if target:
            print(
                "A single export was specified in the command line. Filtering out all other export options"
            )
            export_cfgs = [
                cfg for cfg in export_cfgs if cfg["target"] == target
            ]
            if export_cfgs == []:
                print(
                    "No ExportConfig matches the target name specified in the command line."
                )

        for partial_export_cfg in export_cfgs:
            if not quantize and not output_path:
                export_cfg = config_from_json(ExportConfig, partial_export_cfg)
            else:
                print(
                    "the export-json config is ignored because export options are found the command line"
                )
                export_cfg = config_from_json(
                    ExportConfig,
                    partial_export_cfg,
                    ("export_caffe2_path", "export_onnx_path"),
                )
                export_cfg.torchscript_quantize = quantize
            # if config has export_torchscript_path, use export_torchscript_path from config, otherwise keep the default from CLI
            if export_cfg.export_torchscript_path is not None:
                output_path = export_cfg.export_torchscript_path
            if not model or not output_path:
                config = context.obj.load_config()
                model = model or config.save_snapshot_path
                output_path = output_path or f"{config.save_snapshot_path}.torchscript"

            print(f"Exporting {model} to torchscript file: {output_path}")
            print(export_cfg)
            export_saved_model_to_torchscript(model, output_path, export_cfg)
 def _get_config_with_export_list(
     self,
     task_class: Type[NewTask],
     model_class: Type[Model],
     test_file_metadata: TestFileMetadata,
 ) -> PyTextConfig:
     return PyTextConfig(
         task=task_class.Config(
             data=Data.Config(
                 source=TSVDataSource.Config(
                     train_filename=test_file_metadata.filename,
                     eval_filename=test_file_metadata.filename,
                     test_filename=test_file_metadata.filename,
                     field_names=test_file_metadata.field_names,
                 ),
                 batcher=PoolingBatcher.Config(train_batch_size=1,
                                               test_batch_size=1),
             ),
             trainer=TaskTrainer.Config(epochs=1),
             model=model_class.Config(
                 inputs=type(model_class.Config.inputs)(
                     dense=FloatListTensorizer.Config(
                         column=test_file_metadata.dense_col_name,
                         error_check=True,
                         dim=test_file_metadata.dense_feat_dim,
                     ))),
         ),
         use_tensorboard=False,
         use_cuda_if_available=False,
         export=ExportConfig(
             export_torchscript_path="/tmp/model_torchscript.pt"),
         version=LATEST_VERSION,
     )
 def testEmptyBatchPaddingConfigThrowsException(self):
     empty_export_config = ExportConfig(seq_padding_control=[0, 10, 20])
     model = DummyModel(max_seq_len=22, embedding_dim=10)
     script_func = torch.jit.script(model)
     with self.assertRaises(RuntimeError):
         accelerator_transformerLayers_inputs(model, script_func,
                                              empty_export_config, None, "")
    def testNonPositiveBatchPaddingIgnored(self):
        model = DummyModel(max_seq_len=10, embedding_dim=32)
        script_func = torch.jit.script(model)
        export_config = ExportConfig(seq_padding_control=[22],
                                     batch_padding_control=[0])
        input_examples = accelerator_transformerLayers_inputs(
            model, script_func, export_config, None, "")

        self.assertEqual(len(input_examples), 0)
    def testNonPositiveSeqPaddingIgnored(self):
        model = DummyModel(max_seq_len=10, embedding_dim=32)
        script_func = torch.jit.script(model)
        export_config = ExportConfig(seq_padding_control=[-2, 0],
                                     batch_padding_control=[0, 15])
        input_examples = accelerator_transformerLayers_inputs(
            model, script_func, export_config, None, "")

        # only default max_seq_length used for seq padding
        self.assertEqual(len(input_examples), 1)
    def testSeqPaddingLimitedBymaxSeqLen(self):
        model = DummyModel(max_seq_len=10, embedding_dim=32)
        script_func = torch.jit.script(model)
        export_config = ExportConfig(seq_padding_control=[0, 5, 50],
                                     batch_padding_control=[0, 15])
        input_examples = accelerator_transformerLayers_inputs(
            model, script_func, export_config, None, "")

        # effective seq padding [5, 10]
        self.assertEqual(len(input_examples), 2)
 def testReturnWithCorrectShape(self):
     model = DummyModel(max_seq_len=10, embedding_dim=32)
     script_func = torch.jit.script(model)
     export_config = ExportConfig(seq_padding_control=[0, 5],
                                  batch_padding_control=[0, 15])
     input_examples = accelerator_transformerLayers_inputs(
         model, script_func, export_config, None, "")
     self.assertEqual(len(input_examples), 2)
     self.assertEqual(input_examples[0][0].get_dims(), [5, 15, 32])
     self.assertEqual(input_examples[0][1].get_dims(), [15, 5])
     self.assertEqual(input_examples[1][0].get_dims(), [10, 15, 32])
     self.assertEqual(input_examples[1][1].get_dims(), [15, 10])
def accelerator_transformerLayers_inputs(
    model: nn.Module,
    trace: torch.jit.ScriptFunction,
    export_options: ExportConfig,
    dataset_iterable: Iterable,
    module_path,
):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    if export_options.seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if export_options.batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    batch_padding_control = export_options.batch_padding_control

    # Restrict seq_padding_control to valid ranges
    seq_padding_control = []
    max_seq_len = trace.get_max_seq_len()
    for pad in export_options.seq_padding_control:
        if pad < max_seq_len:
            seq_padding_control.append(pad)
    seq_padding_control.append(max_seq_len)

    # this should use a method, or module_path, instead of being hardcoded
    # embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
    embedding_dim = accelerator.get_embedding_module_from_path(
        model, module_path)

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                 dtype=torch.float32)
            input2 = torch.randn([batch_size, seq_len]).bool()
            input_specs = torch_glow.input_specs_from_tensors([input1, input2])
            input_examples.append(input_specs)

    return input_examples
Beispiel #9
0
def accelerator_lstm_inputs(
    model: nn.Module,
    trace: torch.jit.ScriptFunction,
    export_options: ExportConfig,
    dataset_iterable: Iterable,
    module_path,
):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    if export_options.seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if export_options.batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    batch_padding_control = export_options.batch_padding_control
    seq_padding_control = export_options.seq_padding_control
    embedding_dim = trace.embedding.word_embedding.embedding_dim * 2
    lstm_num_layers = trace.lstm_num_layers
    lstm_dim = trace.lstm_dim

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input_embedding = torch.randn(
                [batch_size, seq_len, embedding_dim], dtype=torch.float32
            )
            input_hidden = torch.randn(
                [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32
            )
            input_cell = torch.randn(
                [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32
            )
            input_specs = torch_glow.input_specs_from_tensors(
                [input_embedding, input_hidden, input_cell]
            )
            input_examples.append(input_specs)

    return input_examples
Beispiel #10
0
def accelerator_transformerLayers_inputs(model: nn.Module,
                                         export_options: ExportConfig,
                                         dataset_iterable: iter, module_path):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    seq_padding_control = export_options.seq_padding_control
    batch_padding_control = export_options.batch_padding_control
    if seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    max_seq_len = model.get_max_seq_len()
    seq_padding_control = [
        pad if pad <= max_seq_len else max_seq_len
        for pad in seq_padding_control
    ] + [max_seq_len]

    # this should use a method, or module_path, instead of being hardcoded
    embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                 dtype=torch.float32)
            input2 = torch.randn([batch_size, seq_len]).bool()
            input_specs = torch_glow.input_specs_from_tensors([input1, input2])
            input_examples.append(input_specs)

    return input_examples
Beispiel #11
0
def get_seq_and_batch_padding_control(
    trace: torch.jit.ScriptFunction, export_options: ExportConfig
):
    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    if export_options.seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if export_options.batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    batch_padding_control = export_options.batch_padding_control

    # Restrict seq_padding_control to valid ranges
    seq_padding_control = []
    max_seq_len = trace.get_max_seq_len()
    for pad in export_options.seq_padding_control:
        if pad < max_seq_len:
            seq_padding_control.append(pad)
    seq_padding_control.append(max_seq_len)

    return seq_padding_control, batch_padding_control
Beispiel #12
0
    def torchscript_export(
        self,
        model,
        export_path=None,
        sort_input=False,
        sort_key=1,
        export_config=None,
    ):
        # TODO(T88310041) Remove These
        torch._C._jit_set_profiling_executor(True)
        torch._C._jit_set_profiling_mode(False)
        # unpack export config
        if export_config is None:
            export_config = ExportConfig()

        quantize = export_config.torchscript_quantize
        seq_padding_control = export_config.seq_padding_control
        batch_padding_control = export_config.batch_padding_control
        accel = AccelerateOptions(export_config.accelerate)
        print(f"Using accelerate options: {accel.__dict__}")

        # what hosts can this model run on
        # by default, pytext works on CPU and CUDA (because it implements set_device)
        model_host = ["cpu", "cuda"]
        if accel.use_cuda:
            # CUDA FP16 models only work on CUDA
            model_host = ["cuda"]
        if accel.use_nnpi:
            model_host = ["nnpi"]
        if hasattr(model, "set_host"):
            model.set_host(model_host)

        # what is the type of this model
        # pytext models are nlp models
        model_type = ["nlp"]

        instance_paths_p = any(
            True for _ in find_module_instances(model, RoBERTaEncoder, []))
        if instance_paths_p:
            model_type.append("transformer")

        instance_paths_p = any(
            True for _ in find_module_instances(model, BiLSTM, []))
        if instance_paths_p:
            model_type.append("BiLSTM")

        if hasattr(model, "set_model"):
            model.set_type(model_type)

        # Make sure to put the model on CPU and disable CUDA before exporting to
        # ONNX to disable any data_parallel pieces
        cuda.CUDA_ENABLED = False
        model.cpu()
        optimizer = self.trainer.optimizer
        optimizer.pre_export(model)

        model = rewrite_nnpi_modules(model, accel)

        # Trace needs eval mode, to disable dropout etc
        model.eval()
        model.prepare_for_onnx_export_()

        unused_raw_batch, batch = next(
            iter(self.data.batches(Stage.TRAIN, load_early=True)))
        inputs = model.onnx_trace_input(batch)
        # call model forward to set correct device types
        if sort_input:
            _, sorted_indices = sort(inputs[sort_key], descending=True)
            inputs = [i.index_select(0, sorted_indices) for i in inputs]
        model(*inputs)

        # Default to dynamic
        if accel.use_fx_quantize:
            data_loader = self.data.batches(Stage.TRAIN, load_early=False)
            trace = quantize_fx(
                model,
                inputs,
                data_loader,
                accel.use_nnpi_fx_dynamic_quantize
                or accel.use_cpu_fx_dynamic_quantize,
                accel.use_nnpi_fx_static_selectively_quantize
                or accel.use_cpu_fx_static_selectively_quantize,
            )
        elif (quantize or accel.use_nnpi_quantize) and hasattr(
                model, "graph_mode_quantize"):
            data_loader = self.data.batches(Stage.TRAIN, load_early=False)
            print("Quantizing the model ...")
            # recognize legazy nnpi_q or $platform:$option syntax
            quantize_linear_only = accel.use_nnpi_quantize
            module_swap = accel.use_nnpi
            trace = quantize_statically(model, inputs, data_loader,
                                        quantize_linear_only, module_swap)
        else:
            if quantize:
                log_feature_usage("quantize.dynamically.CPU")
                model.quantize()
            if accel.use_cuda and (accel.use_cuda_half_ft
                                   or accel.use_cuda_dq):
                log_accelerator_feature_usage(
                    "build.CUDA.half.faster_transformers")
                # We need a separate path for GPU-only tracing, as we can't just trace a CPU model
                # and invoke .cuda().half(),
                # as we don't have equivalent CPU implementations of these operators.
                precision.FP16_ENABLED = True
                cuda.CUDA_ENABLED = True
                if accel.use_cuda_dq:
                    model = swap_modules(model, MODULE_TO_REWRITER["cuda-dq"])
                else:
                    model = swap_modules(model, MODULE_TO_REWRITER["cuda"])
                model.eval()
                model.half().cuda()
                # obtain new inputs with cuda/fp16 enabled.
                unused_raw_batch, batch = next(
                    iter(self.data.batches(Stage.TRAIN, load_early=True)))
                inputs = model.onnx_trace_input(batch)
                trace = model.trace(inputs)
                print("Traced (faster_transformers)!")
                # should be unnecessary.
                trace.cuda().half()
                unused_raw_batch, batch = next(
                    iter(self.data.batches(Stage.TRAIN, load_early=True)))
                inputs = model.onnx_trace_input(batch)
                results = trace(*inputs)
                assert results
            else:
                trace = model.trace(inputs)
                print("Traced!")
                if accel.use_cuda and accel.use_cuda_half:
                    log_accelerator_feature_usage("build.CUDA.half")
                    # convert trace to half precision
                    trace.cuda().half()

                    #### trace test: demonstrate that it is usable
                    precision.FP16_ENABLED = True
                    cuda.CUDA_ENABLED = True
                    unused_raw_batch, batch = next(
                        iter(self.data.batches(Stage.TRAIN, load_early=True)))
                    inputs = model.onnx_trace_input(batch)
                    assert trace(*inputs)
                    #### end of trace test
        if hasattr(model, "torchscriptify"):
            trace = model.torchscriptify(self.data.tensorizers, trace)
        if hasattr(trace, "validate"):
            trace.validate(export_config)
        if seq_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("sequence_length",
                                          seq_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring seq_padding_control"
                )
        if batch_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("batch_length",
                                          batch_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring batch_padding_control"
                )
        trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None)
        if accel.use_nnpi and not accel.use_nnpi_split:
            print("Lowering using to_glow")
            trace = lower_modules_to_accelerator(
                model,
                trace,
                export_config,
                accel.use_nnpi_throughput_optimized,
                accel.use_nnpi_throughput_maximized,
                accel.use_nnpi_gelu_clip,
            )
        if accel.use_nnpi_split:
            print("Lowering split model to Glow")
            trace = lower_split_model_to_accelerator(model, trace,
                                                     export_config)

        if export_path is not None:
            print(f"Saving torchscript model to: {export_path}")
            with PathManager.open(export_path, "wb") as f:
                torch.jit.save(trace, f)
        return trace
Beispiel #13
0
    def torchscript_export(
        self,
        model,
        export_path=None,
        sort_input=False,
        sort_key=1,
        export_config=None,
    ):
        # unpack export config
        if export_config is None:
            export_config = ExportConfig()

        quantize = export_config.torchscript_quantize

        accelerate = export_config.accelerate
        seq_padding_control = export_config.seq_padding_control
        batch_padding_control = export_config.batch_padding_control
        inference_interface = export_config.inference_interface

        # Make sure to put the model on CPU and disable CUDA before exporting to
        # ONNX to disable any data_parallel pieces
        cuda.CUDA_ENABLED = False
        model.cpu()
        optimizer = self.trainer.optimizer
        optimizer.pre_export(model)

        if "nnpi" in accelerate:
            model = swap_modules_for_accelerator(model)

        # Trace needs eval mode, to disable dropout etc
        model.eval()
        model.prepare_for_onnx_export_()

        unused_raw_batch, batch = next(
            iter(self.data.batches(Stage.TRAIN, load_early=True))
        )
        inputs = model.onnx_trace_input(batch)
        # call model forward to set correct device types
        if sort_input:
            _, sorted_indices = sort(inputs[sort_key], descending=True)
            inputs = [i.index_select(0, sorted_indices) for i in inputs]
        model(*inputs)

        use_cuda_half = "cuda:half" in accelerate

        if quantize and hasattr(model, "graph_mode_quantize"):
            data_loader = self.data.batches(Stage.TRAIN, load_early=False)
            print("Quantizing the model ...")
            quantize_linear_only = "nnpi_quantize" in accelerate
            module_swap = "nnpi" in accelerate
            trace = quantize_statically(
                model, inputs, data_loader, quantize_linear_only, module_swap
            )
        else:
            if quantize:
                log_accelerator_feature_usage("quantize.dynamically.CPU")
                model.quantize()

            trace = model.trace(inputs)
            print("traced!")

            if use_cuda_half:
                log_accelerator_feature_usage("build.CUDA.half")
                # convert trace to half precision
                trace.cuda().half()

                #### trace test: demonstrate that it is usable
                precision.FP16_ENABLED = True
                cuda.CUDA_ENABLED = True
                unused_raw_batch, batch = next(
                    iter(self.data.batches(Stage.TRAIN, load_early=True))
                )
                inputs = model.onnx_trace_input(batch)
                assert trace(*inputs)
                #### end of trace test

        if hasattr(model, "torchscriptify"):
            trace = model.torchscriptify(self.data.tensorizers, trace)
        if hasattr(trace, "validate"):
            trace.validate(export_config)
        if seq_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("sequence_length", seq_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring seq_padding_control"
                )
        if batch_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("batch_length", batch_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring batch_padding_control"
                )
        if inference_interface is not None:
            if hasattr(trace, "inference_interface"):
                trace.inference_interface(inference_interface)
            else:
                print(
                    "inference_interface not supported by model. Ignoring inference_interface"
                )
        trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None)
        if "nnpi" in accelerate:
            print("lowering using to_glow")
            trace = lower_modules_to_accelerator(model, trace, export_config)

        if export_path is not None:
            print(f"Saving torchscript model to: {export_path}")
            with PathManager.open(export_path, "wb") as f:
                torch.jit.save(trace, f)
        return trace
Beispiel #14
0
def torchscript_export(context, export_json, model, output_path, quantize):
    """Convert a pytext model snapshot to a torchscript model."""
    export_config = ExportConfig()
    # only populate from export_json if no export option is configured from the command line.
    if export_json:
        export_json_config = _load_and_validate_export_json_config(export_json)

        read_chunk_size = export_json_config.pop("read_chunk_size", None)
        if read_chunk_size is not None:
            print("Warning: Ignoring read_chunk_size.")

        if export_json_config.get("read_chunk_size", None) is not None:
            print(
                "Error: Do not know what to do with read_chunk_size.  Ignoring."
            )

        if "export_list" not in export_json_config.keys():
            export_section_config_list = [export_json_config["export"]]
        else:
            export_section_config_list = export_json_config["export_list"]

        for export_section_config in export_section_config_list:
            if not quantize and not output_path:
                export_config.export_caffe2_path = export_section_config.get(
                    "export_caffe2_path", None)
                export_config.export_onnx_path = export_section_config.get(
                    "export_onnx_path", "/tmp/model.onnx")
                export_config.torchscript_quantize = export_section_config.get(
                    "torchscript_quantize", False)

            else:
                print(
                    "the export-json config is ignored because export options are found the command line"
                )
                export_config.torchscript_quantize = quantize

            export_config.export_torchscript_path = export_section_config.get(
                "export_torchscript_path", None)
            # if config has export_torchscript_path, use export_torchscript_path from config, otherwise keep the default from CLI
            if export_config.export_torchscript_path is not None:
                output_path = export_config.export_torchscript_path

            export_config.export_lite_path = export_section_config.get(
                "export_lite_path", None)
            export_config.inference_interface = export_section_config.get(
                "inference_interface", None)
            export_config.accelerate = export_section_config.get(
                "accelerate", [])
            export_config.seq_padding_control = export_section_config.get(
                "seq_padding_control", None)
            export_config.batch_padding_control = export_section_config.get(
                "batch_padding_control", None)
            if not model or not output_path:
                config = context.obj.load_config()
                model = model or config.save_snapshot_path
                output_path = output_path or f"{config.save_snapshot_path}.torchscript"

            print(f"Exporting {model} to torchscript file: {output_path}")
            export_saved_model_to_torchscript(model, output_path,
                                              export_config)
Beispiel #15
0
    def torchscript_export(
        self,
        model,
        export_path=None,
        sort_input=False,
        sort_key=1,
        export_config=None,
    ):
        # unpack export config
        if export_config is None:
            export_config = ExportConfig()

        quantize = export_config.torchscript_quantize

        accelerate = export_config.accelerate
        seq_padding_control = export_config.seq_padding_control
        batch_padding_control = export_config.batch_padding_control

        # introduce a single nnpi:quantize that obviates need for torchscript quantize on NNPI
        use_nnpi = ("nnpi" in accelerate) or ("nnpi:quantize" in accelerate)
        use_nnpi_throughput_optimized = "nnpi:throughput_optimized" in accelerate
        use_cuda_half = "cuda:half" in accelerate
        use_cuda_half_faster_transformers = "cuda:half:ft" in accelerate

        use_nnpi_quantize = "nnpi:quantize" in accelerate
        use_nnpi_fx_static_quantize = "nnpi:fx_static_quantize" in accelerate
        use_nnpi_fx_dynamic_quantize = "nnpi:fx_dynamic_quantize" in accelerate
        use_cpu_fx_static_quantize = "cpu:fx_static_quantize" in accelerate
        use_cpu_fx_dynamic_quantize = "cpu:fx_dynamic_quantize" in accelerate
        use_fx_quantize = (use_nnpi_fx_static_quantize
                           or use_nnpi_fx_dynamic_quantize
                           or use_cpu_fx_static_quantize
                           or use_cpu_fx_dynamic_quantize)

        # Make sure to put the model on CPU and disable CUDA before exporting to
        # ONNX to disable any data_parallel pieces
        cuda.CUDA_ENABLED = False
        model.cpu()
        optimizer = self.trainer.optimizer
        optimizer.pre_export(model)

        if use_nnpi or use_fx_quantize:
            model = swap_modules_for_accelerator(model)

        # Trace needs eval mode, to disable dropout etc
        model.eval()
        model.prepare_for_onnx_export_()

        unused_raw_batch, batch = next(
            iter(self.data.batches(Stage.TRAIN, load_early=True)))
        inputs = model.onnx_trace_input(batch)
        # call model forward to set correct device types
        if sort_input:
            _, sorted_indices = sort(inputs[sort_key], descending=True)
            inputs = [i.index_select(0, sorted_indices) for i in inputs]
        model(*inputs)

        # Default to dynamic
        if use_fx_quantize:
            data_loader = self.data.batches(Stage.TRAIN, load_early=False)
            trace = quantize_fx(
                model,
                inputs,
                data_loader,
                use_nnpi_fx_dynamic_quantize or use_cpu_fx_dynamic_quantize,
            )
        elif (quantize or use_nnpi_quantize) and hasattr(
                model, "graph_mode_quantize"):
            data_loader = self.data.batches(Stage.TRAIN, load_early=False)
            print("Quantizing the model ...")
            # recognize legazy nnpi_q or $platform:$option syntax
            quantize_linear_only = use_nnpi_quantize or ("nnpi_quantize"
                                                         in accelerate)
            module_swap = use_nnpi
            trace = quantize_statically(model, inputs, data_loader,
                                        quantize_linear_only, module_swap)
        else:
            if quantize:
                log_feature_usage("quantize.dynamically.CPU")
                model.quantize()
            if use_cuda_half_faster_transformers:
                log_accelerator_feature_usage(
                    "build.CUDA.half.faster_transformers")
                # We need a separate path for GPU-only tracing, as we can't just trace a CPU model
                # and invoke .cuda().half(),
                # as we don't have equivalent CPU implementations of these operators.
                precision.FP16_ENABLED = True
                cuda.CUDA_ENABLED = True

                model = swap_modules_for_faster_transformer(model)
                model.eval()
                model.half().cuda()
                # obtain new inputs with cuda/fp16 enabled.
                unused_raw_batch, batch = next(
                    iter(self.data.batches(Stage.TRAIN, load_early=True)))
                inputs = model.onnx_trace_input(batch)
                trace = model.trace(inputs)
                print("traced (faster_transformers)!")
                # should be unnecessary.
                trace.cuda().half()
                unused_raw_batch, batch = next(
                    iter(self.data.batches(Stage.TRAIN, load_early=True)))
                inputs = model.onnx_trace_input(batch)
                assert trace(*inputs)
            else:
                trace = model.trace(inputs)
                print("traced!")
                if use_cuda_half:
                    log_accelerator_feature_usage("build.CUDA.half")
                    # convert trace to half precision
                    trace.cuda().half()

                    #### trace test: demonstrate that it is usable
                    precision.FP16_ENABLED = True
                    cuda.CUDA_ENABLED = True
                    unused_raw_batch, batch = next(
                        iter(self.data.batches(Stage.TRAIN, load_early=True)))
                    inputs = model.onnx_trace_input(batch)
                    assert trace(*inputs)
                    #### end of trace test
        if hasattr(model, "torchscriptify"):
            trace = model.torchscriptify(self.data.tensorizers, trace)
        if hasattr(trace, "validate"):
            trace.validate(export_config)
        if seq_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("sequence_length",
                                          seq_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring seq_padding_control"
                )
        if batch_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("batch_length",
                                          batch_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring batch_padding_control"
                )
        trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None)
        if use_nnpi:
            print("lowering using to_glow")
            trace = lower_modules_to_accelerator(
                model, trace, export_config, use_nnpi_throughput_optimized)

        if export_path is not None:
            print(f"Saving torchscript model to: {export_path}")
            with PathManager.open(export_path, "wb") as f:
                torch.jit.save(trace, f)
        return trace
Beispiel #16
0
    def torchscript_export(self,
                           model,
                           export_path=None,
                           export_config=None):  # noqa
        # unpack export config
        # unpack export config
        if export_config is None:
            export_config = ExportConfig()

        quantize = export_config.torchscript_quantize
        accelerate = export_config.accelerate
        seq_padding_control = export_config.seq_padding_control
        batch_padding_control = export_config.batch_padding_control

        if (accelerate is not None) and (accelerate != []):
            raise RuntimeError(
                "old-style task.py does not support export for NNPI accelerators"
            )

        cuda.CUDA_ENABLED = False
        model.cpu()
        optimizer = self.trainer.optimizer
        optimizer.pre_export(model)

        model.eval()
        model.prepare_for_onnx_export_()

        unused_raw_batch, batch = next(
            iter(self.data.batches(Stage.TRAIN, load_early=True)))
        inputs = model.onnx_trace_input(batch)
        model(*inputs)
        if quantize:
            model.quantize()
        if self.trace_both_encoders:
            trace = jit.trace(model, inputs)
        else:
            trace = jit.trace(model.encoder1, (inputs[0], ))
        if hasattr(model, "torchscriptify"):
            trace = model.torchscriptify(self.data.tensorizers, trace,
                                         self.trace_both_encoders)
        if seq_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("sequence_length",
                                          seq_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring padding_control"
                )
        if batch_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("batch_length",
                                          batch_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring padding_control"
                )
        trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None)
        if export_path is not None:
            print(f"Saving torchscript model to: {export_path}")
            with PathManager.open(export_path, "wb") as f:
                torch.jit.save(trace, f)
        return trace
Beispiel #17
0
    def torchscript_export(self,
                           model,
                           export_path=None,
                           export_config=None):  # noqa
        # unpack export config
        # unpack export config
        if export_config is None:
            export_config = ExportConfig()

        quantize = export_config.torchscript_quantize
        accelerate = export_config.accelerate
        seq_padding_control = export_config.seq_padding_control
        batch_padding_control = export_config.batch_padding_control
        inference_interface = export_config.inference_interface

        cuda.CUDA_ENABLED = False
        model.cpu()
        optimizer = self.trainer.optimizer
        optimizer.pre_export(model)

        model.eval()
        model.prepare_for_onnx_export_()

        unused_raw_batch, batch = next(
            iter(self.data.batches(Stage.TRAIN, load_early=True)))
        inputs = model.onnx_trace_input(batch)
        model(*inputs)
        if quantize:
            model.quantize()
        if accelerate is not None and "half" in accelerate:
            model.half()
        if self.trace_both_encoders:
            trace = jit.trace(model, inputs)
        else:
            trace = jit.trace(model.encoder1, (inputs[0], ))
        if hasattr(model, "torchscriptify"):
            trace = model.torchscriptify(self.data.tensorizers, trace,
                                         self.trace_both_encoders)
        if seq_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("sequence_length",
                                          seq_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring padding_control"
                )
        if batch_padding_control is not None:
            if hasattr(trace, "set_padding_control"):
                trace.set_padding_control("batch_length",
                                          batch_padding_control)
            else:
                print(
                    "Padding_control not supported by model. Ignoring padding_control"
                )
        if inference_interface is not None:
            if hasattr(trace, "inference_interface"):
                trace.inference_interface(inference_interface)
            else:
                print(
                    "inference_interface not supported by model. Ignoring inference_interface"
                )
        trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None)
        if accelerate is not None and "nnpi" in accelerate:
            trace._c = torch._C._freeze_module(
                trace._c,
                preservedAttrs=[
                    "make_prediction", "make_batch", "set_padding_control"
                ],
            )
        if export_path is not None:
            print(f"Saving torchscript model to: {export_path}")
            with PathManager.open(export_path, "wb") as f:
                torch.jit.save(trace, f)
        return trace