def lower_modules_to_accelerator(model: nn.Module, trace,
                                 export_options: ExportConfig):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        submod_modelpath, compilation_spec_dict = accelerator.get_modules(
            model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        # Todod: @input decorator dose not work properly, fixing it later
        # input_sets = inputs.input_process(model, export_options, None, submod_tracepath)
        input_sets = accelerator_transformerLayers_inputs(
            model, trace, export_options, None, submod_tracepath)
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemple #2
0
def lower_modules_to_accelerator(model: nn.Module, trace,
                                 export_options: ExportConfig):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = accelerator.get_modules(model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(model, trace, export_options, None,
                                         submod_modelpath)
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
    def test_to_glow_selective_already_scripted(self):
        a = torch.zeros(4) + 8
        b = torch.zeros(4) + 7
        torch_res = model(a, b)

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        a_spec = torch_glow.InputSpec()
        a_spec.set_same_as(a)
        b_spec = torch_glow.InputSpec()
        b_spec.set_same_as(b)

        compilation_group.input_sets_append([a_spec, b_spec])
        with torch.no_grad():
            traced_model = torch.jit.trace(model, (a, b))

        glow_mod = torch_glow.to_glow_selective(
            traced_model,
            {
                "foo.bar": spec,
                "qux": spec
            },
            inplace=False,
        )
        glow_res = glow_mod(a, b)
        assert torch.allclose(torch_res, glow_res)
    def test_save_preprocessed_module(self):
        with torch.no_grad():
            x = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            model = Bar()
            model.eval()
            model = torch.jit.trace(model, x)

            spec = torch_glow.CompilationSpec()
            spec.get_settings().set_glow_backend("Interpreter")

            compilation_group = torch_glow.CompilationGroup()
            spec.compilation_groups_append(compilation_group)

            compilation_group.input_sets_append(
                torch_glow.input_specs_from_tensors([x]))

            torch_glow.disableFusionPass()
            torch_glow.enable_convert_to_fp16()
            glow_mod = torch_glow.to_glow(model, spec)

            reloaded = utils.save_and_reload_model(glow_mod)

            wrappername = "__loweredModule__"
            attrname = "__processed_module"
            wp = getattr(reloaded._c, wrappername)
            pp = getattr(wp, attrname)
            pt_model = torch.jit._recursive.wrap_cpp_module(pp)
            graph = pt_model.graph_for(x)
            found = False
            for node in graph.nodes():
                if node.kind() == "quantized::conv2d":
                    found = True

            assert found
Exemple #5
0
    def test_to_glow_tuple_output(self):
        input = torch.randn(4)

        model = Foo()

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        input_spec = torch_glow.InputSpec()
        input_spec.set_same_as(input)

        compilation_group.input_sets_append([input_spec])

        scripted_mod = torch.jit.script(model)
        lowered_model = torch_glow.to_glow(scripted_mod, {"forward": spec})

        # Run Glow model
        (gx, gy) = lowered_model(input)

        # Run reference model
        (tx, ty) = model(input)

        assert torch.allclose(tx, gx)
        assert torch.allclose(ty, gy)
Exemple #6
0
    def tuple_test_helper(self, ModType):
        input = torch.randn(4)

        model = ModType()

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        input_spec = torch_glow.InputSpec()
        input_spec.set_same_as(input)

        compilation_group.input_sets_append([input_spec])

        scripted_mod = torch.jit.script(model)
        lowered_model = torch_glow.to_glow(scripted_mod, {"forward": spec})

        # Run Glow model
        g = lowered_model(input)

        # Run reference model
        t = model(input)

        self.assertEqual(type(g), type(t))
        self.assertEqual(len(g), len(t))

        for (gi, ti) in zip(g, t):
            self.assertTrue(torch.allclose(gi, ti))
Exemple #7
0
    def devices_to_use_test_helper(self, input, num_replications):
        model = SimpleModule()

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")
        # Init with total number of devices.
        torch_glow.setGlowBackendNumDevices(6)

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        input_spec = torch_glow.InputSpec()
        input_spec.set_same_as(input)
        compilation_group.input_sets_append([input_spec])
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_num_devices_to_use(3)
        compilation_group_settings.set_replication_count(num_replications)

        traced_mod = torch.jit.trace(model, input)
        lowered_model = torch_glow.to_glow(traced_mod, {"forward": spec})

        g = lowered_model(input)
        t = model(input)

        self.assertEqual(type(g), type(t))
        self.assertEqual(len(g), len(t))
        for (gi, ti) in zip(g, t):
            self.assertTrue(torch.allclose(gi, ti))
Exemple #8
0
    def test_to_glow_selective(self):
        a = torch.zeros(4) + 8
        b = torch.zeros(4) + 7
        torch_res = model(a, b)

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        a_spec = torch_glow.InputSpec()
        a_spec.set_same_as(a)
        b_spec = torch_glow.InputSpec()
        b_spec.set_same_as(b)

        compilation_group.input_sets_append([a_spec, b_spec])

        glow_mod = torch_glow.to_glow_selective(model, {
            "foo.bar": (spec, (a, b)),
            "qux": (spec, (a, b))
        })

        glow_mod = torch.jit.trace(glow_mod, (a, b))
        glow_res = glow_mod(a, b)

        assert torch.allclose(torch_res, glow_res)
Exemple #9
0
def lower_modules_to_accelerator(model: nn.Module,
                                 trace,
                                 export_options: ExportConfig,
                                 throughput_optimize=False):
    # Raise error if accelerator could not be imported
    if not accelerator_lowering_supported:
        raise RuntimeError("Accelerator Lowering not supported!")

    import torch_glow

    log_accelerator_feature_usage("build.NNPI")
    if ((hasattr(model, "encoder")
         and isinstance(model.encoder, RoBERTaEncoder))
            or (hasattr(model, "representation")
                and isinstance(model.representation, AcceleratorBiLSTM)) or
        (hasattr(model, "lower_module")
         # Internal CNN LM module to add accelerator support.
         and type(model.lower_module).__qualname__ == "CNNLowerModule")):
        backend = "NNPI"
        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = accelerator.get_modules(model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)

        # Override the options for throughput-optimized case
        if throughput_optimize:
            compilation_spec_dict["NNPI_IceCores"] = "4"
            compilation_spec_dict["NNPINumParallelChunks"] = "4"
            compilation_group_settings.set_replication_count(3)

        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(model, trace, export_options, None,
                                         submod_modelpath)
        else:
            raise RuntimeError(
                "inputs_function needs to be specified in accelerator decorator"
            )
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemple #10
0
def get_compilation_spec(inputs):
    """helper function to get the compilation spec of the submodule"""
    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend("Interpreter")

    compilation_group = torch_glow.CompilationGroup()
    spec.compilation_groups_append(compilation_group)

    compilation_group.input_sets_append(torch_glow.input_specs_from_tensors(inputs))
    return spec
Exemple #11
0
def generate_glow_spec(module, backend, *inputs):
    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend(backend)
    compilation_group = torch_glow.CompilationGroup()
    spec.compilation_groups_append(compilation_group)

    input_specs = []
    for input in inputs:
        input_spec = torch_glow.InputSpec()
        input_spec.set_same_as(input)
        input_specs.append(input_spec)
    compilation_group.input_sets_append(input_specs)
    return spec
Exemple #12
0
    def build_compiliation_spec(self):
        compilation_spec = torch_glow.CompilationSpec()

        compilation_spec_settings = compilation_spec.get_settings()
        compilation_spec_settings.set_glow_backend("CPU")
        compilation_spec_settings.set_enable_fuser(True)

        fuser_settings = compilation_spec.get_fuser_settings()
        fuser_settings.set_min_fusion_group_size(3)
        fuser_settings.set_max_fusion_merge_size(4)
        fuser_settings.set_fusion_start_index(5)
        fuser_settings.set_fusion_end_index(6)
        fuser_settings.op_blacklist_append("aten::mean")
        fuser_settings.op_blacklist_append("aten::dropout")

        compilation_group = torch_glow.CompilationGroup()

        input1_spec = torch_glow.input_spec_from_tensor(torch.randn(2, 3, 224, 224))
        input2_spec = torch_glow.input_spec_from_tensor(
            torch.randn(3, 2).to(torch.float16)
        )
        compilation_group.input_sets_append([input1_spec, input2_spec])
        compilation_group.input_sets_append(
            torch_glow.input_specs_from_tensors(
                [torch.randn(1, 3, 224, 224), torch.randn(4, 1)]
            )
        )

        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        compilation_group_settings.set_num_devices_to_use(50)
        compilation_group_settings.set_replication_count(52)
        compilation_group_settings.backend_specific_opts_insert("apple", "orange")

        compilation_spec.compilation_groups_append(compilation_group)

        default_compilation_group_settings = (
            compilation_spec.get_default_compilation_group_settings()
        )
        default_compilation_group_settings.set_convert_to_fp16(False)
        default_compilation_group_settings.set_num_devices_to_use(89)
        default_compilation_group_settings.set_replication_count(90)
        default_compilation_group_settings.backend_specific_opts_insert(
            "hello", "goodbye"
        )

        return compilation_spec
def run_to_glow(m, x):
    """Trace the model m with input x and call to_glow"""
    traced_m = torch.jit.trace(m, (x))

    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend("Interpreter")

    compilation_group = torch_glow.CompilationGroup()
    spec.compilation_groups_append(compilation_group)

    input_spec = torch_glow.InputSpec()
    input_spec.set_same_as(x)

    compilation_group.input_sets_append([input_spec])

    lowered_module = torch_glow.to_glow(traced_m, spec)

    return lowered_module
    def test_to_glow_multiple_groups_and_input_sets(self):
        x1 = torch.randn(1, 4)
        y1 = torch.randn(2, 4)

        x2 = torch.randn(1, 2)
        y2 = torch.randn(5, 2)

        x3 = torch.randn(7)
        y3 = torch.randn(3, 7)

        mod = Foo()
        scripted_mod = torch.jit.script(mod)

        x1_y1_set = torch_glow.input_specs_from_tensors([x1, y1])
        x2_y2_set = torch_glow.input_specs_from_tensors([x2, y2])
        x3_y3_set = torch_glow.input_specs_from_tensors([x3, y3])

        # Create two CompilationGroup, first one contains two input sets
        # and the second CompilationGroup has the third input set
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group_1 = torch_glow.CompilationGroup()
        compilation_group_2 = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group_1)
        spec.compilation_groups_append(compilation_group_2)

        compilation_group_1.input_sets_append(x1_y1_set)
        compilation_group_1.input_sets_append(x2_y2_set)
        compilation_group_2.input_sets_append(x3_y3_set)

        lowered_module = torch_glow.to_glow(scripted_mod, spec)

        torch_res1 = mod(x1, y1)
        torch_res2 = mod(x2, y2)
        torch_res3 = mod(x3, y3)

        glow_res1 = lowered_module(x1, y1)
        glow_res2 = lowered_module(x2, y2)
        glow_res3 = lowered_module(x3, y3)

        assert torch.allclose(torch_res1, glow_res1)
        assert torch.allclose(torch_res2, glow_res2)
        assert torch.allclose(torch_res3, glow_res3)
Exemple #15
0
def lower_modules_to_accelerator(model, trace, seq_padding_control,
                                 batch_padding_control):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        submod_modelpath, compilation_spec_dict = accelerator.get_modules(
            model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        for seq_len in seq_padding_control:
            if seq_len <= 0:
                continue
            for batch_size in batch_padding_control:
                if batch_size <= 0:
                    continue
                input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                     dtype=torch.float32)
                input2 = torch.randn([batch_size, seq_len]).bool()
                input_specs = torch_glow.input_specs_from_tensors(
                    [input1, input2])
                compilation_group.input_sets_append(input_specs)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
    def test_serialization(self):
        with torch.no_grad():
            x = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            y = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            model = Bar()
            model = torch.jit.trace(model, (x, y))

            spec = torch_glow.CompilationSpec()
            spec_settings = spec.get_settings()
            spec_settings.set_glow_backend("NNPI")
            # Enabled the serialize in this spec
            spec_settings.set_enable_serialize(True)

            compilation_group = torch_glow.CompilationGroup()
            compilation_group_settings = compilation_group.get_settings()
            compilation_group_settings.set_replication_count(1)
            compilation_group_settings.backend_specific_opts_insert(
                "NNPI_IceCores", "1")

            compilation_group.input_sets_append(
                torch_glow.input_specs_from_tensors([x, y]))

            spec.compilation_groups_append(compilation_group)
            torch_glow.disableFusionPass()
            torch_glow.enable_convert_to_fp16()

            # Enable global serialize
            # then compile(serialize) the model and save it
            torch_glow.enable_dump_serialized_model()
            glow_mod = torch_glow.to_glow(model, spec)
            res1 = glow_mod(x, y)
            torch.jit.save(glow_mod, "/tmp/serialize_to_glow.pt")

            # Enable global deserialize and disable serialize
            # and load(deserialize) the model to loaded_glow_mod
            torch_glow.enable_deserialize()
            torch_glow.disable_dump_serialized_model()
            loaded_glow_mod = torch.jit.load("/tmp/serialize_to_glow.pt")
            res2 = loaded_glow_mod(x, y)
            assert torch.allclose(res1, res2, 1e-5, 1e-5)
def run_model(m, input, randomize):
    torch_glow.disableFusionPass()
    traced_m = torch.jit.trace(m, input)

    if randomize:
        torch_glow.enable_randomize_constants()
    else:
        torch_glow.disable_randomize_constants()

    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend("Interpreter")

    compilation_group = torch_glow.CompilationGroup()
    spec.compilation_groups_append(compilation_group)

    input_spec = torch_glow.InputSpec()
    input_spec.set_same_as(input)

    compilation_group.input_sets_append([input_spec])

    glow_m = torch_glow.to_glow(traced_m, {"forward": spec})
    return glow_m(input)
def lower_modules_to_accelerator(model, trace, seq_padding_control,
                                 batch_padding_control):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("NNPI")
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        compilation_group.get_settings().backend_specific_opts_insert(
            "NNPI_IceCores", "12")
        compilation_group.get_settings().backend_specific_opts_insert(
            "NNPINumParallelChunks", "12")

        for seq_len in seq_padding_control:
            if seq_len <= 0:
                continue
            for batch_size in batch_padding_control:
                if batch_size <= 0:
                    continue
                input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                     dtype=torch.float32)
                input2 = torch.randn([batch_size, seq_len]).bool()
                input_specs = torch_glow.input_specs_from_tensors(
                    [input1, input2])
                compilation_group.input_sets_append(input_specs)

        trace = torch_glow.to_glow_selective(
            trace,
            {"model.encoder.encoder.transformer.layers": spec},
            inplace=False,
        )
        return trace
    else:
        return trace
Exemple #19
0
    def lower_and_write_to_onnx_helper(self, ModType, onnx_prefix):
        x = torch.randn(1, 3, 8, 8)
        model = create_model(x, ModType)

        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)

        input_spec = torch_glow.InputSpec()
        input_spec.set_same_as(x)

        compilation_group.input_sets_append([input_spec])

        scripted_mod = torch.jit.trace(model, x)
        torch_glow.enable_write_to_onnx()
        torch_glow.set_onnx_file_name_prefix(onnx_prefix)
        torch_glow.enable_write_without_randomize()
        lowered_model = torch_glow.to_glow(scripted_mod, {"forward": spec})

        # Run Glow model
        g = lowered_model(x)

        # Run reference model
        t = model(x)

        self.assertEqual(type(g), type(t))
        self.assertEqual(len(g), len(t))

        for (gi, ti) in zip(g, t):
            self.assertTrue(torch.allclose(gi, ti))

        assert os.path.exists(onnx_prefix + ".onnxtxt")
        onnx_files = glob.glob(onnx_prefix + "*.onnx*")
        for f in onnx_files:
            os.remove(f)
Exemple #20
0
def infer_nnpi(model, device, data_type, input_size, output_size, batch_size,
               args):
    import torch_glow
    # Detailed structure for spec can be found at https://fburl.com/diffusion/79q4efud
    # Create compilation spec
    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend("NNPI")
    # Create compilation group and update settings.
    # Compilation group contains compilation specific information like
    # fp16 settings, enableRemoveMutation, anything that changes
    # the Glow graph compiled for example.
    compilation_group = torch_glow.CompilationGroup()
    compilation_group_settings = compilation_group.get_settings()
    compilation_group_settings.set_convert_to_fp16(True)
    compilation_group_settings.set_replication_count(1)
    compilation_group_settings.backend_specific_opts_insert(
        "NNPI_IceCores", "1")

    data = torch.randn(batch_size, input_size)
    # Create input spec and add it into compilation group.
    # This is used for shape inference when lowering the model to Glow.
    data_spec = torch_glow.InputSpec()
    data_spec.set_same_as(data)
    compilation_group.input_sets_append([data_spec])

    spec.compilation_groups_append(compilation_group)

    traced_model = torch.jit.trace(model, (data))
    lowered_model = torch_glow.to_glow(traced_model, spec)

    start_time = time.time()
    for i in range(args.steps + args.warmups):
        lowered_model(data)
        if i < args.warmups:
            start_time = time.time()
    return time.time() - start_time
Exemple #21
0
def lower_modules_to_accelerator(
    model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False
):
    # Raise error if accelerator could not be imported
    if not accelerator_lowering_supported:
        raise RuntimeError("Accelerator Lowering not supported!")

    import torch_glow

    log_accelerator_feature_usage("build.NNPI")
    if (
        (hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder))
        or (
            hasattr(model, "representation")
            and isinstance(model.representation, AcceleratorBiLSTM)
        )
        or (
            hasattr(model, "lower_module")
            # Internal CNN LM module to add accelerator support.
            and type(model.lower_module).__qualname__ == "CNNLowerModule"
        )
    ):
        backend = "NNPI"
        backend_qualifier = ""

        if throughput_optimize:
            backend_qualifier = ":throughput_optimized"

        modules_to_lower = accelerator.get_modules(model, backend + backend_qualifier)

        if len(modules_to_lower) < 1:
            raise RuntimeError("Need at least one module to lower to accelerator")
        elif len(modules_to_lower) > 1:
            print(f"Warning. Received {len(modules_to_lower)} modules to lower.")
            print("Warning. Only lowering first module.")

        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = modules_to_lower[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()

        # Set values from dict that are not set via backend-specific opts
        compilation_group_settings.set_convert_to_fp16(
            compilation_spec_dict.pop("glow:ConvertToFP16", "true") in ["true", "True"]
        )
        compilation_group_settings.set_replication_count(
            int(compilation_spec_dict.pop("glow:ReplicationCount", "1"))
        )

        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(
                model, trace, export_options, None, submod_modelpath
            )
        else:
            raise RuntimeError(
                "inputs_function needs to be specified in accelerator decorator"
            )
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace