コード例 #1
0
ファイル: generator.py プロジェクト: wathela/webdnn
    def generate(cls, graph: Graph, **kwargs):
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = allocate(graph)

        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}"
        )
        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}"
        )
        console.debug(
            f"[FallbackDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}"
        )

        constant_encoder = ConstantEncoder.get_encoder(
            kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(
            f"[FallbackDescriptorGenerator] constants encoded size: {len(constants_bytes)}"
        )

        descriptor = GraphDescriptor(kernels=cls.generate_kernels(
            graph, memory_layout),
                                     memory_layout=memory_layout,
                                     inputs=graph.inputs,
                                     outputs=graph.outputs,
                                     constants_encoding=constant_encoder.name,
                                     licenses=graph.licenses)

        return GraphExecutionData(graph, descriptor, constants_bytes)
コード例 #2
0
ファイル: generator.py プロジェクト: wathela/webdnn
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebGLOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)
            with open("cg.dot", "w") as f:
                f.write(traverse.dump_dot(graph))

        memory_layout = allocate(graph)

        constants_map = {}
        for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable):  # type: ConstantVariable
            constants_map[constant.name] = {
                "byte_offset": memory_layout[constant].offset * 4,
                "size": constant.size
            }

        constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        kernels = cls.generate_kernels(graph)

        descriptor = GraphDescriptor(
            kernels=kernels,
            memory_layout=memory_layout,
            inputs=graph.inputs,
            outputs=graph.outputs,
            constants_encoding=constant_encoder.name,
            constants_map=constants_map,
            licenses=graph.licenses
        )

        return GraphExecutionData(graph, descriptor, constants_bytes)
コード例 #3
0
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebGPUOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = allocate(graph)
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}[B]")
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}[B]")
        console.debug(f"[WebGPUDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}[B]")

        constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(f"[WebGPUDescriptorGenerator] constants encoded size: {len(constants_bytes)}[B]")

        kernels = cls.generate_kernels(graph, memory_layout)

        descriptor = GraphDescriptor(
            kernels=kernels,
            memory_layout=memory_layout,
            inputs=graph.inputs,
            outputs=graph.outputs,
            constants_encoding=constant_encoder.name,
            licenses=graph.licenses
        )

        if flags.optimize.VALIDATE_GENERATED_SOURCE:
            validate_kernel_source(descriptor)

        return GraphExecutionData(graph, descriptor, constants_bytes)
コード例 #4
0
def main():
    sys.setrecursionlimit(10000)

    parser = argparse.ArgumentParser()
    parser.add_argument("--model", default="resnet50", choices=["resnet50"])
    parser.add_argument('--out',
                        '-o',
                        default='output_tensorflow',
                        help='Directory to output the graph descriptor')
    parser.add_argument("--encoding", help="name of weight encoder")
    parser.add_argument("--backend",
                        default="webgpu,webgl,webassembly,fallback",
                        help="backend")
    args = parser.parse_args()

    os.makedirs(args.out, exist_ok=True)
    slim_dir = os.path.join(args.out, "models/slim")
    if not os.path.exists(slim_dir):
        clone_slim(args.out)

    model_path = download_model(args.out)

    sys.path.append(slim_dir)
    from nets import resnet_v1
    image_size = resnet_v1.resnet_v1.default_image_size

    checkpoints_dir = args.out
    sess = tf.Session()
    processed_images = tf.placeholder(tf.float32,
                                      [1, image_size, image_size, 3])

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        logits, _ = resnet_v1.resnet_v1_50(processed_images,
                                           num_classes=1000,
                                           is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(model_path,
                                             slim.get_model_variables())

    init_fn(sess)

    graph = TensorFlowConverter(sess, batch_size=1).convert([processed_images],
                                                            [probabilities])

    from webdnn.graph import traverse
    traverse.dump(graph)

    for backend in args.backend.split(","):
        graph_exec_data = generate_descriptor(
            backend, graph, constant_encoder_name=args.encoding)
        graph_exec_data.save(args.out)

    console.stderr("Done.")
コード例 #5
0
    def optimize(self, graph: Graph):
        traverse.dump(graph)
        MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE

        for v in traverse.listup_variables(graph):
            height, width = TextureShape.get(v)
            assert height <= MAX_SIZE and width <= MAX_SIZE, f"Texture size is invalid: {v.name} \n" \
                                                             f"  (variable shape)={v.shape}, \n" \
                                                             f"  (channel mode)={ChannelMode.get(v).name}, \n" \
                                                             f"  (texture shape)=(width={width}, height={height}), \n" \
                                                             f"  (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}"
        return graph, False
コード例 #6
0
ファイル: generator.py プロジェクト: cys4/webdnn
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebassemblyOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)

        memory_layout = Allocator.allocate(graph)

        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout total size: {memory_layout.total_size * 4}"
        )
        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout static size: {memory_layout.static_size * 4}"
        )
        console.debug(
            f"[WebassemblyDescriptorGenerator] memory_layout dynamic size: {memory_layout.dynamic_size * 4}"
        )

        constant_encoder = ConstantEncoder.get_encoder(
            kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        console.debug(
            f"[WebassemblyDescriptorGenerator] constants encoded size: {len(constants_bytes)}"
        )

        kernels = cls.generate_kernels(graph, memory_layout)

        heap_block_size = 16 * 1024 * 1024
        if isinstance(memory_layout.dynamic_size, int):
            dynamic_size_byte_int = memory_layout.dynamic_size * 4
        else:
            dynamic_size_byte_int = kwargs.get("dynamic_allocation_size",
                                               heap_block_size)
        total_size_byte = memory_layout.static_size * 4 + dynamic_size_byte_int

        # required for calculation (size ceiling to one block) + one block
        required_heap = (
            (total_size_byte + heap_block_size - 1) // heap_block_size +
            1) * heap_block_size

        descriptor = GraphDescriptor(kernels=kernels,
                                     memory_layout=memory_layout,
                                     inputs=graph.inputs,
                                     outputs=graph.outputs,
                                     constants_encoding=constant_encoder.name,
                                     required_heap=required_heap,
                                     licenses=graph.licenses)

        return GraphExecutionData(graph, descriptor, constants_bytes)
コード例 #7
0
    def optimize(self, graph: Graph):
        traverse.dump(graph)
        MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE

        for v in traverse.listup_variables(graph):
            if not Placeholder.check_resolved(v.size):
                continue

            height, width = TextureShape.get(v)
            assert height <= MAX_SIZE and width <= MAX_SIZE, f"""
[SplitTexture] Texture size is invalid: {v.name}
    (variable shape)={v.shape}
    (channel mode)={ChannelMode.get(v).name}
    (texture shape)=(width={width}, height={height})
    (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}"""

        return graph, False
コード例 #8
0
def generate(graph: Graph,
             constant_encoder_name: str = None) -> GraphExecutionData:
    graph, _ = WebGPUOptimizeRule().optimize(graph)
    if flags.DEBUG:
        traverse.dump(graph)

    variables_layout, constants_layout, constants_data = Allocator.allocate(
        graph)

    constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name)
    constants_bytes = constant_encoder.encode(constants_layout, constants_data)

    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebGPU] allocated constant-buffer size: {constants_layout.size * 4} [Byte]"
        )
        print(
            f"[GraphDescriptorGeneratorWebGPU]   encoded constant-buffer size: {len(constants_bytes)} [Byte]"
        )
        print(
            f"[GraphDescriptorGeneratorWebGPU] allocated variable-buffer size: {variables_layout.size * 4} [Byte]"
        )

    kernels = generate_kernels(graph, constants_layout, variables_layout)

    descriptor = GraphDescriptor(kernels=kernels,
                                 constants_layout=constants_layout,
                                 variables_layout=variables_layout,
                                 inputs=graph.inputs,
                                 outputs=graph.outputs,
                                 constants_encoding=constant_encoder.name,
                                 licenses=graph.licenses)

    if flags.optimize.VALIDATE_GENERATED_SOURCE:
        if flags.DEBUG:
            print(
                "[GraphDescriptorGeneratorWebGPU] validate generated kernel source"
            )

        validate_kernel_source(descriptor)

    return GraphExecutionData(descriptor, constants_bytes)
コード例 #9
0
ファイル: generator.py プロジェクト: liuguoyou/webdnn
def generate(graph: Graph,
             constant_encoder_name: str = None) -> GraphExecutionData:
    graph, _ = WebassemblyOptimizeRule().optimize(graph)
    if flags.DEBUG:
        traverse.dump(graph)

    variables_layout, constants_layout, constants_data = Allocator.allocate(
        graph)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebassembly] constants_layout total size: {constants_data.size} * sizeof(float)"
        )
        print(
            f"[GraphDescriptorGeneratorWebassembly] variables_layout total size: {variables_layout.size} * sizeof(float)"
        )
    constant_encoder = ConstantEncoder.get_encoder(constant_encoder_name)
    constants_bytes = constant_encoder.encode(constants_layout, constants_data)
    if flags.DEBUG:
        print(
            f"[GraphDescriptorGeneratorWebGPU] constants encoded size: {len(constants_bytes)}"
        )

    kernels = generate_kernels(graph, constants_layout, variables_layout)

    weight_data_size = (variables_layout.size +
                        constants_layout.size) * 4  # sizeof(float)
    required_heap = (int(weight_data_size //
                         (16 * 1048576)) + 2) * 16 * 1048576  # required + 16MB

    descriptor = GraphDescriptor(kernels=kernels,
                                 constants_layout=constants_layout,
                                 variables_layout=variables_layout,
                                 inputs=graph.inputs,
                                 outputs=graph.outputs,
                                 constants_encoding=constant_encoder.name,
                                 required_heap=required_heap,
                                 licenses=graph.licenses)

    return GraphExecutionData(descriptor, constants_bytes)
コード例 #10
0
    def generate(cls, graph: Graph, **kwargs):
        data_dict = {}  # type: Dict[int, Tuple[GraphDescriptor, bytes]]

        original_graph = graph
        for max_texture_size in [4096, 8192, 16384]:
            config.WEBGL_MAX_TEXTURE_SIZE = max_texture_size
            graph, _ = WebGLOptimizeRule().optimize(copy.deepcopy(original_graph))
            if flags.DEBUG:
                traverse.dump(graph)

            memory_layout = allocate(graph)

            constants_map = {}
            for constant in traverse.filter_nodes(traverse.listup_nodes(graph), ConstantVariable):  # type: ConstantVariable
                constants_map[constant.name] = {
                    "byte_offset": memory_layout[constant].offset * 4,
                    "size": constant.size
                }

            constant_encoder = ConstantEncoder.get_encoder(kwargs.get("constant_encoder_name", None))
            constants_bytes = constant_encoder.encode(memory_layout)

            kernels = cls.generate_kernels(graph)

            descriptor = GraphDescriptor(
                kernels=kernels,
                memory_layout=memory_layout,
                inputs=graph.inputs,
                outputs=graph.outputs,
                constants_encoding=constant_encoder.name,
                constants_map=constants_map,
                licenses=graph.licenses
            )
            data_dict[max_texture_size] = (descriptor, constants_bytes)

        return GraphExecutionData(graph, data_dict)
コード例 #11
0
def main():
    sys.setrecursionlimit(10000)  # workaround for deep copying large graph
    parser = argparse.ArgumentParser()
    parser.add_argument("kerasmodel")
    parser.add_argument("--backend",
                        default="webgpu,webgl,webassembly,fallback",
                        help="comma-separated list of backends")
    parser.add_argument(
        "--input_shape",
        required=True,
        action="append",
        help=
        "shape of blobs for inputs (example: '(1,3,224,224)'), can be specified multiple times"
    )
    # parser.add_argument("--input_data_format", choices=["channels_first", "channels_last"])
    parser.add_argument(
        "--out",
        help="output directory (default: <model>/webdnn_graph_descriptor)")
    parser.add_argument("--encoding", help="name of weight encoder")
    parser.add_argument("--visualize_ir", action="store_true")
    parser.add_argument(
        "--plugin",
        action="append",
        help="plugin python files which are imported before transpiling")
    args = parser.parse_args()

    console.stderr(f"[{path.basename(__file__)}] Generating feedforward graph")
    class_list = []
    if args.plugin:
        for plugin_path in args.plugin:
            class_list += _load_plugin(plugin_path)
    custom_objects = {}
    if len(class_list) > 0:
        # custom_objects is a dictionary for load_model to load user-defined custom layers
        for k, v in class_list:
            custom_objects[k] = v

    input_shapes = [
        Shape.parse(input_shape)[0] for input_shape in args.input_shape
    ]

    model = keras.models.load_model(args.kerasmodel,
                                    custom_objects=custom_objects,
                                    compile=False)
    model.build(input_shape=None)
    converter = KerasConverter(batch_size=Placeholder(label='N'))
    graph = converter.convert(model)
    traverse.dump(graph)

    for graph_input, input_shape in zip(graph.inputs, input_shapes):
        for p1, p2 in zip(graph_input.shape, input_shape):
            if not Placeholder.check_resolved(
                    p1) and Placeholder.check_resolved(p2):
                p1.value = Placeholder.force_int(p2)

            elif Placeholder.check_resolved(
                    p1) and not Placeholder.check_resolved(p2):
                raise ValueError(
                    f'Shape mismatch: expected:{input_shape}, real:{graph_input.shape}, {p1} != {p2}'
                )

            elif Placeholder.check_resolved(p1) and Placeholder.check_resolved(
                    p2):
                assert p1 == p2, f'Shape mismatch: expected:{input_shape}, real:{graph_input.shape}, {p1} != {p2}'

    if args.out:
        output_dir = args.out
    else:
        output_dir = path.join(path.dirname(args.kerasmodel),
                               "webdnn_graph_descriptor")
    os.makedirs(output_dir, exist_ok=True)

    if args.visualize_ir:
        ir_dot_path = path.join(output_dir, "ir.dot")
        with open(ir_dot_path, "w") as f:
            f.write(dump_dot(graph))
        console.stderr(
            f"IR graph can be visualized with graphviz command: 'dot {ir_dot_path} -T png -o output.png'"
        )

    console.stderr(f"[{path.basename(__file__)}] Generating graph descriptor")

    any_backend_failed = False
    backends = args.backend.split(",")
    for i, backend in enumerate(backends):
        console.stderr(
            f"[{path.basename(__file__)}] BackendName: {console.colorize(backend, console.Color.Cyan)}"
        )
        try:
            graph_exec_data = generate_descriptor(
                backend, graph, constant_encoder_name=args.encoding)
            graph_exec_data.save(output_dir)
        except Exception as ex:
            if flags.DEBUG:
                raise ex

            any_backend_failed = True
            console.error(
                f"[{path.basename(__file__)}] Failed generating descriptor for {backend} backend"
            )
            console.stderr(traceback.format_exc())
            continue

    if any_backend_failed:
        exit(1)