Beispiel #1
0
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebGLOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)
            with open("cg.dot", "w") as f:
                f.write(traverse.dump_dot(graph))

        memory_layout = allocate(graph)

        constants_map = {}
        for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable):  # type: ConstantVariable
            constants_map[constant.name] = {
                "byte_offset": memory_layout[constant].offset * 4,
                "size": constant.size
            }

        constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        kernels = cls.generate_kernels(graph)

        descriptor = GraphDescriptor(
            kernels=kernels,
            memory_layout=memory_layout,
            inputs=graph.inputs,
            outputs=graph.outputs,
            constants_encoding=constant_encoder.name,
            constants_map=constants_map,
            licenses=graph.licenses
        )

        return GraphExecutionData(graph, descriptor, constants_bytes)
Beispiel #2
0
    def generate(cls, graph: Graph, **kwargs):
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = allocate(graph)

        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}"
        )
        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}"
        )
        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}"
        )

        constant_encoder = ConstantEncoder.get_encoder(
            kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(
            f"[FallbackDescriptorGenerator] constants encoded size: {len(constants_bytes)}"
        )

        descriptor = GraphDescriptor(kernels=cls.generate_kernels(
            graph, memory_layout),
                                     memory_layout=memory_layout,
                                     inputs=graph.inputs,
                                     outputs=graph.outputs,
                                     constants_encoding=constant_encoder.name,
                                     licenses=graph.licenses)

        return GraphExecutionData(graph, descriptor, constants_bytes)
Beispiel #3
0
    def generate(cls, graph: Graph, **kwargs):
        data_dict = {}  # type: Dict[int, Tuple[GraphDescriptor, bytes]]

        for max_texture_size in [4096, 8192, 16384]:
            config.WEBGL_MAX_TEXTURE_SIZE = max_texture_size
            graph, _ = WebGLOptimizeRule().optimize(graph)

            memory_layout = allocate(graph)

            constants_map = {}
            for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable):  # type: ConstantVariable
                constants_map[constant.name] = {
                    "byte_offset": memory_layout[constant].offset * 4,
                    "size": constant.size
                }

            constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
            constants_bytes = constant_encoder.encode(memory_layout)

            kernels = cls.generate_kernels(graph)

            descriptor = GraphDescriptor(
                kernels=kernels,
                memory_layout=memory_layout,
                inputs=graph.inputs,
                outputs=graph.outputs,
                constants_encoding=constant_encoder.name,
                constants_map=constants_map,
                licenses=graph.licenses
            )
            data_dict[max_texture_size] = (descriptor, constants_bytes)

        return GraphExecutionData(graph, data_dict)
Beispiel #4
0
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebGPUOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = allocate(graph)
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}[B]")
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}[B]")
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}[B]")

        constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(f"[WebGPUDescriptorGenerator] constants encoded size: {len(constants_bytes)}[B]")

        kernels = cls.generate_kernels(graph, memory_layout)

        descriptor = GraphDescriptor(
            kernels=kernels,
            memory_layout=memory_layout,
            inputs=graph.inputs,
            outputs=graph.outputs,
            constants_encoding=constant_encoder.name,
            licenses=graph.licenses
        )

        if flags.optimize.VALIDATE_GENERATED_SOURCE:
            validate_kernel_source(descriptor)

        return GraphExecutionData(graph, descriptor, constants_bytes)
Beispiel #5
0
def generate(graph: Graph,
             constant_encoder_name: str = None) -> GraphExecutionData:
    variables_layout, constants_layout, constants_data = Allocator.allocate(
        graph)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorFallback] constants_layout total size: {constants_data.size} * sizeof(float)"
        )
        print(
            f"[GraphDescriptorGeneratorFallback] variables_layout total size: {variables_layout.size} * sizeof(float)"
        )
    constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name)
    constants_bytes = constant_encoder.encode(constants_layout, constants_data)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorFallback] constants encoded size: {len(constants_bytes)}"
        )

    kernels = generate_kernels(graph, constants_layout, variables_layout)

    descriptor = GraphDescriptor(kernels=kernels,
                                 constants_layout=constants_layout,
                                 variables_layout=variables_layout,
                                 inputs=graph.inputs,
                                 outputs=graph.outputs,
                                 constants_encoding=constant_encoder.name,
                                 licenses=graph.licenses)

    return GraphExecutionData(descriptor, constants_bytes)
Beispiel #6
0
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebassemblyOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = Allocator.allocate(graph)

        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}"
        )
        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}"
        )
        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}"
        )

        constant_encoder = ConstantEncoder.get_encoder(
            kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(
            f"[WebassemblyDescriptorGenerator] constants encoded size: {len(constants_bytes)}"
        )

        kernels = cls.generate_kernels(graph, memory_layout)

        heap_block_size = 16 * 1024 * 1024
        if isinstance(memory_layout.dynamic_size, int):
            dynamic_size_byte_int = memory_layout.dynamic_size * 4
        else:
            dynamic_size_byte_int = kwargs.get("dynamic_allocation_size",
                                               heap_block_size)
        total_size_byte = memory_layout.static_size * 4 + dynamic_size_byte_int

        # required for calculation (size ceiling to one block) + one block
        required_heap = (
            (total_size_byte + heap_block_size - 1) // heap_block_size +
            1) * heap_block_size

        descriptor = GraphDescriptor(kernels=kernels,
                                     memory_layout=memory_layout,
                                     inputs=graph.inputs,
                                     outputs=graph.outputs,
                                     constants_encoding=constant_encoder.name,
                                     required_heap=required_heap,
                                     licenses=graph.licenses)

        return GraphExecutionData(graph, descriptor, constants_bytes)
Beispiel #7
0
def generate(graph: Graph,
             constant_encoder_name: str = None) -> GraphExecutionData:
    graph, _ = WebGPUOptimizeRule().optimize(graph)
    if flags.DEBUG:
        traverse.dump(graph)

    variables_layout, constants_layout, constants_data = Allocator.allocate(
        graph)

    constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name)
    constants_bytes = constant_encoder.encode(constants_layout, constants_data)

    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebGPU] allocated constant-buffer size: {constants_layout.size * 4} [Byte]"
        )
        print(
            f"[GraphDescriptorGeneratorWebGPU]   encoded constant-buffer size: {len(constants_bytes)} [Byte]"
        )
        print(
            f"[GraphDescriptorGeneratorWebGPU] allocated variable-buffer size: {variables_layout.size * 4} [Byte]"
        )

    kernels = generate_kernels(graph, constants_layout, variables_layout)

    descriptor = GraphDescriptor(kernels=kernels,
                                 constants_layout=constants_layout,
                                 variables_layout=variables_layout,
                                 inputs=graph.inputs,
                                 outputs=graph.outputs,
                                 constants_encoding=constant_encoder.name,
                                 licenses=graph.licenses)

    if flags.optimize.VALIDATE_GENERATED_SOURCE:
        if flags.DEBUG:
            print(
                "[GraphDescriptorGeneratorWebGPU] validate generated kernel source"
            )

        validate_kernel_source(descriptor)

    return GraphExecutionData(descriptor, constants_bytes)
Beispiel #8
0
def generate(graph: Graph,
             constant_encoder_name: str = None) -> GraphExecutionData:
    graph, _ = WebassemblyOptimizeRule().optimize(graph)
    if flags.DEBUG:
        traverse.dump(graph)

    variables_layout, constants_layout, constants_data = Allocator.allocate(
        graph)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebassembly] constants_layout total size: {constants_data.size} * sizeof(float)"
        )
        print(
            f"[GraphDescriptorGeneratorWebassembly] variables_layout total size: {variables_layout.size} * sizeof(float)"
        )
    constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name)
    constants_bytes = constant_encoder.encode(constants_layout, constants_data)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebGPU] constants encoded size: {len(constants_bytes)}"
        )

    kernels = generate_kernels(graph, constants_layout, variables_layout)

    weight_data_size = (variables_layout.size +
                        constants_layout.size) * 4  # sizeof(float)
    required_heap = (int(weight_data_size //
                         (16 * 1048576)) + 2) * 16 * 1048576  # required + 16MB

    descriptor = GraphDescriptor(kernels=kernels,
                                 constants_layout=constants_layout,
                                 variables_layout=variables_layout,
                                 inputs=graph.inputs,
                                 outputs=graph.outputs,
                                 constants_encoding=constant_encoder.name,
                                 required_heap=required_heap,
                                 licenses=graph.licenses)

    return GraphExecutionData(descriptor, constants_bytes)