Ejemplo n.º 1
0
def accelerator_split_module_inputs(
    trace: torch.jit.ScriptFunction, export_options: ExportConfig
):
    import torch_glow

    seq_padding_control, batch_padding_control = get_seq_and_batch_padding_control(
        trace, export_options
    )

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue

        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue

            input1 = torch.randint(3, (batch_size, seq_len))
            input2 = torch.randint(3, (batch_size, seq_len))
            input3 = torch.rand(batch_size, seq_len).bool()

            input_specs = torch_glow.input_specs_from_tensors([input1, input2, input3])
            input_examples.append(input_specs)

    return input_examples
    def test_save_preprocessed_module(self):
        with torch.no_grad():
            x = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            model = Bar()
            model.eval()
            model = torch.jit.trace(model, x)

            spec = torch_glow.CompilationSpec()
            spec.get_settings().set_glow_backend("Interpreter")

            compilation_group = torch_glow.CompilationGroup()
            spec.compilation_groups_append(compilation_group)

            compilation_group.input_sets_append(
                torch_glow.input_specs_from_tensors([x]))

            torch_glow.disableFusionPass()
            torch_glow.enable_convert_to_fp16()
            glow_mod = torch_glow.to_glow(model, spec)

            reloaded = utils.save_and_reload_model(glow_mod)

            wrappername = "__loweredModule__"
            attrname = "__processed_module"
            wp = getattr(reloaded._c, wrappername)
            pp = getattr(wp, attrname)
            pt_model = torch.jit._recursive.wrap_cpp_module(pp)
            graph = pt_model.graph_for(x)
            found = False
            for node in graph.nodes():
                if node.kind() == "quantized::conv2d":
                    found = True

            assert found
Ejemplo n.º 3
0
def accelerator_transformerLayers_inputs(
    model: nn.Module,
    trace: torch.jit.ScriptFunction,
    export_options: ExportConfig,
    dataset_iterable: Iterable,
    module_path,
):
    import torch_glow

    seq_padding_control, batch_padding_control = get_seq_and_batch_padding_control(
        trace, export_options
    )

    # this should use a method, or module_path, instead of being hardcoded
    # embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
    embedding_dim = accelerator.get_embedding_module_from_path(model, module_path)

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input1 = torch.randn(
                [seq_len, batch_size, embedding_dim], dtype=torch.float32
            )
            input2 = torch.randn([batch_size, seq_len]).bool()
            input_specs = torch_glow.input_specs_from_tensors([input1, input2])
            input_examples.append(input_specs)

    return input_examples
    def test_to_glow_multiple_groups_and_input_sets(self):
        x1 = torch.randn(1, 4)
        y1 = torch.randn(2, 4)

        x2 = torch.randn(1, 2)
        y2 = torch.randn(5, 2)

        x3 = torch.randn(7)
        y3 = torch.randn(3, 7)

        mod = Foo()
        scripted_mod = torch.jit.script(mod)

        x1_y1_set = torch_glow.input_specs_from_tensors([x1, y1])
        x2_y2_set = torch_glow.input_specs_from_tensors([x2, y2])
        x3_y3_set = torch_glow.input_specs_from_tensors([x3, y3])

        # Create two CompilationGroup, first one contains two input sets
        # and the second CompilationGroup has the third input set
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("Interpreter")

        compilation_group_1 = torch_glow.CompilationGroup()
        compilation_group_2 = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group_1)
        spec.compilation_groups_append(compilation_group_2)

        compilation_group_1.input_sets_append(x1_y1_set)
        compilation_group_1.input_sets_append(x2_y2_set)
        compilation_group_2.input_sets_append(x3_y3_set)

        lowered_module = torch_glow.to_glow(scripted_mod, spec)

        torch_res1 = mod(x1, y1)
        torch_res2 = mod(x2, y2)
        torch_res3 = mod(x3, y3)

        glow_res1 = lowered_module(x1, y1)
        glow_res2 = lowered_module(x2, y2)
        glow_res3 = lowered_module(x3, y3)

        assert torch.allclose(torch_res1, glow_res1)
        assert torch.allclose(torch_res2, glow_res2)
        assert torch.allclose(torch_res3, glow_res3)
Ejemplo n.º 5
0
def get_compilation_spec(inputs):
    """helper function to get the compilation spec of the submodule"""
    spec = torch_glow.CompilationSpec()
    spec.get_settings().set_glow_backend("Interpreter")

    compilation_group = torch_glow.CompilationGroup()
    spec.compilation_groups_append(compilation_group)

    compilation_group.input_sets_append(torch_glow.input_specs_from_tensors(inputs))
    return spec
Ejemplo n.º 6
0
def accelerator_transformerLayers_inputs(
    model: nn.Module,
    trace: torch.jit.ScriptFunction,
    export_options: ExportConfig,
    dataset_iterable: Iterable,
    module_path,
):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    if export_options.seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if export_options.batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    batch_padding_control = export_options.batch_padding_control

    # Restrict seq_padding_control to valid ranges
    seq_padding_control = []
    max_seq_len = trace.get_max_seq_len()
    for pad in export_options.seq_padding_control:
        if pad < max_seq_len:
            seq_padding_control.append(pad)
    seq_padding_control.append(max_seq_len)

    # this should use a method, or module_path, instead of being hardcoded
    # embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
    embedding_dim = accelerator.get_embedding_module_from_path(
        model, module_path)

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                 dtype=torch.float32)
            input2 = torch.randn([batch_size, seq_len]).bool()
            input_specs = torch_glow.input_specs_from_tensors([input1, input2])
            input_examples.append(input_specs)

    return input_examples
Ejemplo n.º 7
0
    def build_compiliation_spec(self):
        compilation_spec = torch_glow.CompilationSpec()

        compilation_spec_settings = compilation_spec.get_settings()
        compilation_spec_settings.set_glow_backend("CPU")
        compilation_spec_settings.set_enable_fuser(True)

        fuser_settings = compilation_spec.get_fuser_settings()
        fuser_settings.set_min_fusion_group_size(3)
        fuser_settings.set_max_fusion_merge_size(4)
        fuser_settings.set_fusion_start_index(5)
        fuser_settings.set_fusion_end_index(6)
        fuser_settings.op_blacklist_append("aten::mean")
        fuser_settings.op_blacklist_append("aten::dropout")

        compilation_group = torch_glow.CompilationGroup()

        input1_spec = torch_glow.input_spec_from_tensor(torch.randn(2, 3, 224, 224))
        input2_spec = torch_glow.input_spec_from_tensor(
            torch.randn(3, 2).to(torch.float16)
        )
        compilation_group.input_sets_append([input1_spec, input2_spec])
        compilation_group.input_sets_append(
            torch_glow.input_specs_from_tensors(
                [torch.randn(1, 3, 224, 224), torch.randn(4, 1)]
            )
        )

        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        compilation_group_settings.set_num_devices_to_use(50)
        compilation_group_settings.set_replication_count(52)
        compilation_group_settings.backend_specific_opts_insert("apple", "orange")

        compilation_spec.compilation_groups_append(compilation_group)

        default_compilation_group_settings = (
            compilation_spec.get_default_compilation_group_settings()
        )
        default_compilation_group_settings.set_convert_to_fp16(False)
        default_compilation_group_settings.set_num_devices_to_use(89)
        default_compilation_group_settings.set_replication_count(90)
        default_compilation_group_settings.backend_specific_opts_insert(
            "hello", "goodbye"
        )

        return compilation_spec
Ejemplo n.º 8
0
def accelerator_lstm_inputs(
    model: nn.Module,
    trace: torch.jit.ScriptFunction,
    export_options: ExportConfig,
    dataset_iterable: Iterable,
    module_path,
):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    if export_options.seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if export_options.batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    batch_padding_control = export_options.batch_padding_control
    seq_padding_control = export_options.seq_padding_control
    embedding_dim = trace.embedding.word_embedding.embedding_dim * 2
    lstm_num_layers = trace.lstm_num_layers
    lstm_dim = trace.lstm_dim

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input_embedding = torch.randn(
                [batch_size, seq_len, embedding_dim], dtype=torch.float32
            )
            input_hidden = torch.randn(
                [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32
            )
            input_cell = torch.randn(
                [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32
            )
            input_specs = torch_glow.input_specs_from_tensors(
                [input_embedding, input_hidden, input_cell]
            )
            input_examples.append(input_specs)

    return input_examples
Ejemplo n.º 9
0
def lower_modules_to_accelerator(model, trace, seq_padding_control,
                                 batch_padding_control):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        submod_modelpath, compilation_spec_dict = accelerator.get_modules(
            model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        for seq_len in seq_padding_control:
            if seq_len <= 0:
                continue
            for batch_size in batch_padding_control:
                if batch_size <= 0:
                    continue
                input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                     dtype=torch.float32)
                input2 = torch.randn([batch_size, seq_len]).bool()
                input_specs = torch_glow.input_specs_from_tensors(
                    [input1, input2])
                compilation_group.input_sets_append(input_specs)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Ejemplo n.º 10
0
    def test_serialization(self):
        with torch.no_grad():
            x = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            y = torch.randn([1, 4, 4, 4], dtype=torch.float32)
            model = Bar()
            model = torch.jit.trace(model, (x, y))

            spec = torch_glow.CompilationSpec()
            spec_settings = spec.get_settings()
            spec_settings.set_glow_backend("NNPI")
            # Enabled the serialize in this spec
            spec_settings.set_enable_serialize(True)

            compilation_group = torch_glow.CompilationGroup()
            compilation_group_settings = compilation_group.get_settings()
            compilation_group_settings.set_replication_count(1)
            compilation_group_settings.backend_specific_opts_insert(
                "NNPI_IceCores", "1")

            compilation_group.input_sets_append(
                torch_glow.input_specs_from_tensors([x, y]))

            spec.compilation_groups_append(compilation_group)
            torch_glow.disableFusionPass()
            torch_glow.enable_convert_to_fp16()

            # Enable global serialize
            # then compile(serialize) the model and save it
            torch_glow.enable_dump_serialized_model()
            glow_mod = torch_glow.to_glow(model, spec)
            res1 = glow_mod(x, y)
            torch.jit.save(glow_mod, "/tmp/serialize_to_glow.pt")

            # Enable global deserialize and disable serialize
            # and load(deserialize) the model to loaded_glow_mod
            torch_glow.enable_deserialize()
            torch_glow.disable_dump_serialized_model()
            loaded_glow_mod = torch.jit.load("/tmp/serialize_to_glow.pt")
            res2 = loaded_glow_mod(x, y)
            assert torch.allclose(res1, res2, 1e-5, 1e-5)
Ejemplo n.º 11
0
def accelerator_transformerLayers_inputs(model: nn.Module,
                                         export_options: ExportConfig,
                                         dataset_iterable: iter, module_path):
    import torch_glow

    # we use the padding control from the Export Config:
    if export_options is None:
        export_options = ExportConfig()

    seq_padding_control = export_options.seq_padding_control
    batch_padding_control = export_options.batch_padding_control
    if seq_padding_control is None:
        raise RuntimeError("seq padding control not specified")
    if batch_padding_control is None:
        raise RuntimeError("batch padding control not specified")

    max_seq_len = model.get_max_seq_len()
    seq_padding_control = [
        pad if pad <= max_seq_len else max_seq_len
        for pad in seq_padding_control
    ] + [max_seq_len]

    # this should use a method, or module_path, instead of being hardcoded
    embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim

    input_examples = []
    for seq_len in seq_padding_control:
        if seq_len <= 0:
            continue
        for batch_size in batch_padding_control:
            if batch_size <= 0:
                continue
            # Todo: We directly generate data input instead of using dataset_iterable, enhance later
            input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                 dtype=torch.float32)
            input2 = torch.randn([batch_size, seq_len]).bool()
            input_specs = torch_glow.input_specs_from_tensors([input1, input2])
            input_examples.append(input_specs)

    return input_examples
Ejemplo n.º 12
0
def lower_modules_to_accelerator(model, trace, seq_padding_control,
                                 batch_padding_control):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend("NNPI")
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        compilation_group.get_settings().backend_specific_opts_insert(
            "NNPI_IceCores", "12")
        compilation_group.get_settings().backend_specific_opts_insert(
            "NNPINumParallelChunks", "12")

        for seq_len in seq_padding_control:
            if seq_len <= 0:
                continue
            for batch_size in batch_padding_control:
                if batch_size <= 0:
                    continue
                input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                     dtype=torch.float32)
                input2 = torch.randn([batch_size, seq_len]).bool()
                input_specs = torch_glow.input_specs_from_tensors(
                    [input1, input2])
                compilation_group.input_sets_append(input_specs)

        trace = torch_glow.to_glow_selective(
            trace,
            {"model.encoder.encoder.transformer.layers": spec},
            inplace=False,
        )
        return trace
    else:
        return trace