def generate(cls, graph: Graph, **kwargs): graph, _ = WebGLOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) with open("cg.dot", "w") as f: f.write(traverse.dump_dot(graph)) memory_layout = allocate(graph) constants_map = {} for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable): # type: ConstantVariable constants_map[constant.name] = { "byte_offset": memory_layout[constant].offset * 4, "size": constant.size } constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) kernels = cls.generate_kernels(graph) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, constants_map=constants_map, licenses=graph.licenses ) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): if flags.DEBUG: traverse.dump(graph) memory_layout = allocate(graph) console.debug( f"[FallbackDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}" ) console.debug( f"[FallbackDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}" ) console.debug( f"[FallbackDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}" ) constant_encoder = ConstantEncoder.get_encoder( kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug( f"[FallbackDescriptorGenerator] constants encoded size: {len(constants_bytes)}" ) descriptor = GraphDescriptor(kernels=cls.generate_kernels( graph, memory_layout), memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): data_dict = {} # type: Dict[int, Tuple[GraphDescriptor, bytes]] for max_texture_size in [4096, 8192, 16384]: config.WEBGL_MAX_TEXTURE_SIZE = max_texture_size graph, _ = WebGLOptimizeRule().optimize(graph) memory_layout = allocate(graph) constants_map = {} for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable): # type: ConstantVariable constants_map[constant.name] = { "byte_offset": memory_layout[constant].offset * 4, "size": constant.size } constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) kernels = cls.generate_kernels(graph) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, constants_map=constants_map, licenses=graph.licenses ) data_dict[max_texture_size] = (descriptor, constants_bytes) return GraphExecutionData(graph, data_dict)
def generate(cls, graph: Graph, **kwargs): graph, _ = WebGPUOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) memory_layout = allocate(graph) console.debug(f"[WebGPUDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}[B]") console.debug(f"[WebGPUDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}[B]") console.debug(f"[WebGPUDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}[B]") constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug(f"[WebGPUDescriptorGenerator] constants encoded size: {len(constants_bytes)}[B]") kernels = cls.generate_kernels(graph, memory_layout) descriptor = GraphDescriptor( kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses ) if flags.optimize.VALIDATE_GENERATED_SOURCE: validate_kernel_source(descriptor) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(graph: Graph, constant_encoder_name: str = None) -> GraphExecutionData: variables_layout, constants_layout, constants_data = Allocator.allocate( graph) if flags.DEBUG: print( f"[GraphDescriptorGeneratorFallback] constants_layout total size: {constants_data.size} * sizeof(float)" ) print( f"[GraphDescriptorGeneratorFallback] variables_layout total size: {variables_layout.size} * sizeof(float)" ) constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name) constants_bytes = constant_encoder.encode(constants_layout, constants_data) if flags.DEBUG: print( f"[GraphDescriptorGeneratorFallback] constants encoded size: {len(constants_bytes)}" ) kernels = generate_kernels(graph, constants_layout, variables_layout) descriptor = GraphDescriptor(kernels=kernels, constants_layout=constants_layout, variables_layout=variables_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses) return GraphExecutionData(descriptor, constants_bytes)
def generate(cls, graph: Graph, **kwargs): graph, _ = WebassemblyOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) memory_layout = Allocator.allocate(graph) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}" ) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}" ) console.debug( f"[WebassemblyDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}" ) constant_encoder = ConstantEncoder.get_encoder( kwargs.get("constant_encoder_name", None)) constants_bytes = constant_encoder.encode(memory_layout) console.debug( f"[WebassemblyDescriptorGenerator] constants encoded size: {len(constants_bytes)}" ) kernels = cls.generate_kernels(graph, memory_layout) heap_block_size = 16 * 1024 * 1024 if isinstance(memory_layout.dynamic_size, int): dynamic_size_byte_int = memory_layout.dynamic_size * 4 else: dynamic_size_byte_int = kwargs.get("dynamic_allocation_size", heap_block_size) total_size_byte = memory_layout.static_size * 4 + dynamic_size_byte_int # required for calculation (size ceiling to one block) + one block required_heap = ( (total_size_byte + heap_block_size - 1) // heap_block_size + 1) * heap_block_size descriptor = GraphDescriptor(kernels=kernels, memory_layout=memory_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, required_heap=required_heap, licenses=graph.licenses) return GraphExecutionData(graph, descriptor, constants_bytes)
def generate(graph: Graph, constant_encoder_name: str = None) -> GraphExecutionData: graph, _ = WebGPUOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) variables_layout, constants_layout, constants_data = Allocator.allocate( graph) constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name) constants_bytes = constant_encoder.encode(constants_layout, constants_data) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebGPU] allocated constant-buffer size: {constants_layout.size * 4} [Byte]" ) print( f"[GraphDescriptorGeneratorWebGPU] encoded constant-buffer size: {len(constants_bytes)} [Byte]" ) print( f"[GraphDescriptorGeneratorWebGPU] allocated variable-buffer size: {variables_layout.size * 4} [Byte]" ) kernels = generate_kernels(graph, constants_layout, variables_layout) descriptor = GraphDescriptor(kernels=kernels, constants_layout=constants_layout, variables_layout=variables_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, licenses=graph.licenses) if flags.optimize.VALIDATE_GENERATED_SOURCE: if flags.DEBUG: print( "[GraphDescriptorGeneratorWebGPU] validate generated kernel source" ) validate_kernel_source(descriptor) return GraphExecutionData(descriptor, constants_bytes)
def generate(graph: Graph, constant_encoder_name: str = None) -> GraphExecutionData: graph, _ = WebassemblyOptimizeRule().optimize(graph) if flags.DEBUG: traverse.dump(graph) variables_layout, constants_layout, constants_data = Allocator.allocate( graph) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebassembly] constants_layout total size: {constants_data.size} * sizeof(float)" ) print( f"[GraphDescriptorGeneratorWebassembly] variables_layout total size: {variables_layout.size} * sizeof(float)" ) constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name) constants_bytes = constant_encoder.encode(constants_layout, constants_data) if flags.DEBUG: print( f"[GraphDescriptorGeneratorWebGPU] constants encoded size: {len(constants_bytes)}" ) kernels = generate_kernels(graph, constants_layout, variables_layout) weight_data_size = (variables_layout.size + constants_layout.size) * 4 # sizeof(float) required_heap = (int(weight_data_size // (16 * 1048576)) + 2) * 16 * 1048576 # required + 16MB descriptor = GraphDescriptor(kernels=kernels, constants_layout=constants_layout, variables_layout=variables_layout, inputs=graph.inputs, outputs=graph.outputs, constants_encoding=constant_encoder.name, required_heap=required_heap, licenses=graph.licenses) return GraphExecutionData(descriptor, constants_bytes)