Python YoloConvLayer10 Examples

Programming Language: Python

Namespace/Package Name: flextensor.nn

Class/Type: YoloConvLayer10

Examples at hotexamples.com: 2

Python YoloConvLayer10 - 2 examples found. These are the top rated real world Python examples of flextensor.nn.YoloConvLayer10 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

YoloConvLayer10(2)

Frequently Used Methods

YoloConvLayer10 (2)

Example #1

Show file

File: schedule_conv2d_nchw_cuda.py Project: zhangmingfei2008/FlexTensor

def try_yolo_conv_cuda(batch_size=1):
    # get the compute
    yolo_conv = YoloConvLayer10()
    input_shape = yolo_conv.get_intput_shape()
    inputs = tvm.te.placeholder((batch_size, *input_shape), dtype="float32")
    weight = yolo_conv.get_weight()
    outputs = yolo_conv(inputs)

    s = tvm.te.create_schedule(outputs.op)
    schedule_yolo_conv_cuda(s, outputs, inputs, weight)

    arg_bufs = [inputs, weight, outputs]
    stmt = tvm.lower(s, arg_bufs, simple_mode=True)
    print(stmt)
    dev_id = 3
    ctx = tvm.nd.context("cuda", dev_id)
    max_dims = ctx.max_thread_dimensions
    kwargs = {
        "max_shared_memory_per_block": ctx.max_shared_memory_per_block,
        "max_threads_per_block": ctx.max_threads_per_block,
        "max_thread_x": max_dims[0],
        "max_thread_y": max_dims[1],
        "max_thread_z": max_dims[2]
    }
    verify = tvm.tir.ir_pass.VerifyGPUCode(stmt, kwargs)
    print(verify)
    time_cost = _evaluate(s, arg_bufs, "cuda", dev_id, 10)
    print("Yolo conv10 use", time_cost, "ms")

Example #2

Show file

File: schedule_conv2d_nchw_cuda.py Project: zhangmingfei2008/FlexTensor

def try_yolo_conv_opencl(batch_size=1):
    # get the compute
    yolo_conv = YoloConvLayer10()
    input_shape = yolo_conv.get_intput_shape()
    inputs = tvm.te.placeholder((batch_size, *input_shape), dtype="float32")
    weight = yolo_conv.get_weight()
    outputs = yolo_conv(inputs)

    s = tvm.te.create_schedule(outputs.op)
    schedule_yolo_conv_opencl(s, outputs, inputs, weight)

    arg_bufs = [inputs, weight, outputs]
    stmt = tvm.lower(s, arg_bufs, simple_mode=True)
    # print(stmt)
    func = tvm.build(s, arg_bufs, "opencl")
    print(func.imported_modules[0].get_source())