Beispiel #1
0
def dense_strategy_cpu(attrs, inputs, out_type, target):
    """dense x86 strategy"""
    strategy = _op.OpStrategy()
    m, _ = inputs[0].shape
    same_type = inputs[0].dtype == inputs[1].dtype == out_type.dtype
    dtype = inputs[0].dtype
    u8s8s32 = dtype == "uint8" and inputs[
        1].dtype == "int8" and out_type.dtype == "int32"
    strategy.add_implementation(
        wrap_compute_dense(topi.x86.dense_nopack),
        wrap_topi_schedule(topi.x86.schedule_dense_nopack),
        name="dense_nopack.x86",
        plevel=10,
    )

    if is_auto_scheduler_enabled():
        strategy.add_implementation(
            wrap_compute_dense(topi.nn.dense, need_auto_scheduler_layout=True),
            naive_schedule,
            name="dense.generic",
            plevel=11,
        )

    if "cblas" in target.libs:
        with SpecializedCondition(same_type
                                  and dtype in ["float32", "float64"]):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_cblas),
                wrap_topi_schedule(topi.x86.schedule_dense_cblas),
                name="dense_cblas.x86",
                plevel=13,
            )
    if "mkl" in target.libs:
        with SpecializedCondition(
                same_type and dtype in ["float32", "float64"] or u8s8s32):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_mkl),
                wrap_topi_schedule(topi.x86.schedule_dense_mkl),
                name="dense_mkl.x86",
                plevel=14,
            )
    if "mkldnn" in target.libs:
        with SpecializedCondition(same_type and dtype == "float32"):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_mkldnn),
                wrap_topi_schedule(topi.x86.schedule_dense_mkldnn),
                name="dense_mkldnn.x86",
                plevel=15,
            )
    with SpecializedCondition(m >= 16):
        # this implementation may not be well-optimized, so use plevel=5 for now.
        strategy.add_implementation(
            wrap_compute_dense(topi.x86.dense_pack),
            wrap_topi_schedule(topi.x86.schedule_dense_pack),
            name="dense_pack.x86",
            plevel=5,
        )
    return strategy
Beispiel #2
0
def dense_strategy_cuda(attrs, inputs, out_type, target):
    """dense cuda strategy"""
    strategy = _op.OpStrategy()
    if out_type.dtype == "int8":
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_int8),
            wrap_topi_schedule(topi.cuda.schedule_dense_int8),
            name="dense_int8.cuda")
    else:
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_small_batch),
            wrap_topi_schedule(topi.cuda.schedule_dense_small_batch),
            name="dense_small_batch.cuda")
        b = inputs[0].shape[0]
        with SpecializedCondition(b >= 32):
            strategy.add_implementation(
                wrap_compute_dense(topi.cuda.dense_large_batch),
                wrap_topi_schedule(topi.cuda.schedule_dense_large_batch),
                name="dense_large_batch.cuda",
                plevel=15)
    if target.target_name == "cuda" and "cublas" in target.libs:
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_cublas),
            wrap_topi_schedule(topi.cuda.schedule_dense_cublas),
            name="dense_cublas.cuda",
            plevel=20)
    return strategy
Beispiel #3
0
def dense_strategy_cuda(attrs, inputs, out_type, target):
    """dense cuda strategy"""
    strategy = _op.OpStrategy()
    data, weights = inputs
    b, i = get_const_tuple(data.shape)
    o, _ = get_const_tuple(weights.shape)
    if (target.kind.name in ["cuda", "vulkan", "rocm"] and data.dtype == "int8"
            and weights.dtype == "int8" and out_type.dtype == "int32"):
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_int8),
            wrap_topi_schedule(topi.cuda.schedule_dense_int8),
            name="dense_int8.cuda",
        )
    else:
        strategy.add_implementation(
            wrap_compute_dense(topi.gpu.dense_small_batch),
            wrap_topi_schedule(topi.gpu.schedule_dense_small_batch),
            name="dense_small_batch.gpu",
        )

        with SpecializedCondition(b >= 32):
            strategy.add_implementation(
                wrap_compute_dense(topi.gpu.dense_large_batch),
                wrap_topi_schedule(topi.gpu.schedule_dense_large_batch),
                name="dense_large_batch.gpu",
                plevel=5,
            )

    if target.kind.name == "cuda":
        if nvcc.have_tensorcore(target=target):
            if ((data.dtype in ["float16", "int8", "uint8"] and
                 ((i % 16 == 0 and b % 16 == 0 and o % 16 == 0) or
                  (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) or
                  (i % 16 == 0 and b % 32 == 0 and o % 8 == 0)))
                    or (data.dtype in ["int4", "uint4"] and i % 32 == 0
                        and b % 8 == 0 and o % 8 == 0)
                    or (data.dtype in ["int1", "uint1"] and i % 128 == 0
                        and b % 8 == 0 and o % 8 == 0)):
                strategy.add_implementation(
                    wrap_compute_dense(topi.cuda.dense_tensorcore),
                    wrap_topi_schedule(topi.cuda.schedule_dense_tensorcore),
                    name="dense_tensorcore.cuda",
                    plevel=20,
                )

    if target.kind.name == "cuda" and "cublas" in target.libs:
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_cublas),
            wrap_topi_schedule(topi.cuda.schedule_dense_cublas),
            name="dense_cublas.cuda",
            plevel=25,
        )
    return strategy
Beispiel #4
0
def dense_strategy_cuda(attrs, inputs, out_type, target):
    """dense cuda strategy"""
    strategy = _op.OpStrategy()
    data, weights = inputs
    b, i = get_const_tuple(data.shape)
    o, _ = get_const_tuple(weights.shape)
    if out_type.dtype == "int8":
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_int8),
            wrap_topi_schedule(topi.cuda.schedule_dense_int8),
            name="dense_int8.cuda",
        )
    else:
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_small_batch),
            wrap_topi_schedule(topi.cuda.schedule_dense_small_batch),
            name="dense_small_batch.cuda",
        )

        strategy.add_auto_scheduler(
            wrap_compute_dense(topi.nn.dense),
            name="dense",
        )

        with SpecializedCondition(b >= 32):
            strategy.add_implementation(
                wrap_compute_dense(topi.cuda.dense_large_batch),
                wrap_topi_schedule(topi.cuda.schedule_dense_large_batch),
                name="dense_large_batch.cuda",
                plevel=5,
            )
        if target.kind.name == "cuda":
            if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
                if (
                    (i % 16 == 0 and b % 16 == 0 and o % 16 == 0)
                    or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0)
                    or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0)
                ):
                    strategy.add_implementation(
                        wrap_compute_dense(topi.cuda.dense_tensorcore),
                        wrap_topi_schedule(topi.cuda.schedule_dense_tensorcore),
                        name="dense_tensorcore.cuda",
                        plevel=20,
                    )
    if target.kind.name == "cuda" and "cublas" in target.libs:
        strategy.add_implementation(
            wrap_compute_dense(topi.cuda.dense_cublas),
            wrap_topi_schedule(topi.cuda.schedule_dense_cublas),
            name="dense_cublas.cuda",
            plevel=25,
        )
    return strategy
Beispiel #5
0
def dense_strategy_cpu(attrs, inputs, out_type, target):
    """dense x86 strategy"""
    strategy = _op.OpStrategy()
    m, _ = inputs[0].shape
    strategy.add_implementation(wrap_compute_dense(topi.x86.dense_nopack),
                                wrap_topi_schedule(topi.x86.schedule_dense_nopack),
                                name="dense_nopack.x86",
                                plevel=10)
    if "cblas" in target.libs:
        strategy.add_implementation(wrap_compute_dense(topi.x86.dense_cblas),
                                    wrap_topi_schedule(topi.x86.schedule_dense_cblas),
                                    name="dense_cblas.x86",
                                    plevel=15)
    with SpecializedCondition(m >= 16):
        # this implementation may not be well-optimized, so use plevel=8 for now.
        strategy.add_implementation(wrap_compute_dense(topi.x86.dense_pack),
                                    wrap_topi_schedule(topi.x86.schedule_dense_pack),
                                    name="dense_pack.x86",
                                    plevel=5)
    return strategy
Beispiel #6
0
def scatter_cuda(attrs, inputs, out_type, target):
    """scatter cuda strategy"""
    strategy = _op.OpStrategy()
    strategy.add_implementation(
        wrap_compute_scatter(topi.cuda.scatter),
        wrap_topi_schedule(topi.cuda.schedule_scatter),
        name="scatter.cuda",
        plevel=10,
    )

    rank = len(inputs[0].shape)

    with SpecializedCondition(rank == 1):
        if can_use_thrust(target, "tvm.contrib.thrust.stable_sort_by_key"):
            strategy.add_implementation(
                wrap_compute_scatter(topi.cuda.scatter_via_sort),
                wrap_topi_schedule(topi.cuda.schedule_scatter_via_sort),
                name="scatter_via_sort.cuda",
                plevel=9,  # use the sequential version by default
            )
    return strategy
Beispiel #7
0
Datei: x86.py Projekt: saudet/tvm
def matmul_strategy_cpu(attrs, inputs, out_type, target):
    """matmul x86 strategy"""
    strategy = _op.OpStrategy()

    same_type = inputs[0].dtype == inputs[1].dtype == out_type.dtype
    dtype = inputs[0].dtype
    u8s8s32 = dtype == "uint8" and inputs[1].dtype == "int8" and out_type.dtype == "int32"
    if "cblas" in target.libs:
        length_before = len(strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(same_type and dtype in ["float32", "float64"]):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_cblas),
                wrap_topi_schedule(topi.x86.schedule_matmul_cblas),
                name="matmul_cblas.x86",
                plevel=13,
            )
        length_after = len(strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning(
                "Currently cblas only support the data type to be float32 or float64. Skip."
            )
    if "mkl" in target.libs:
        length_before = len(strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(same_type and dtype in ["float32", "float64"] or u8s8s32):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_mkl),
                wrap_topi_schedule(topi.x86.schedule_matmul_mkl),
                name="matmul_mkl.x86",
                plevel=14,
            )
        length_after = len(strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning(
                "Currently mkl only support the data type to be float32, float64 or input with "
                "uint8 and int8 while output wiht int32. Skip."
            )
    if "mkldnn" in target.libs:
        length_before = len(strategy.specializations) if strategy.specializations else 0
        with SpecializedCondition(same_type and dtype == "float32"):
            strategy.add_implementation(
                wrap_compute_matmul(topi.x86.matmul_mkldnn),
                wrap_topi_schedule(topi.x86.schedule_matmul_mkldnn),
                name="matmul_mkldnn.x86",
                plevel=15,
            )
        length_after = len(strategy.specializations) if strategy.specializations else 0
        if length_before == length_after:
            logger.warning("Currently mkldnn only support the data type to be float32. Skip.")

    if is_auto_scheduler_enabled():
        strategy.add_implementation(
            wrap_compute_matmul(topi.nn.matmul, need_auto_scheduler_layout=True),
            naive_schedule,
            name="matmul.generic",
            plevel=11,
        )
    else:
        # If no cblas/mkl/mkldnn strategy choosed
        if not strategy.specializations:
            logger.warning(
                "Matmul is not optimized for x86. "
                "Recommend to use cblas/mkl/mkldnn for better performance."
            )
        strategy.add_implementation(
            wrap_compute_matmul(topi.nn.matmul),
            naive_schedule,
            name="matmul.generic",
        )
    return strategy
Beispiel #8
0
def dense_strategy_cpu(attrs, inputs, out_type, target):
    """dense x86 strategy"""
    strategy = _op.OpStrategy()
    same_type = inputs[0].dtype == inputs[1].dtype == out_type.dtype
    dtype = inputs[0].dtype
    u8s8s32 = dtype == "uint8" and inputs[
        1].dtype == "int8" and out_type.dtype == "int32"
    strategy.add_implementation(
        wrap_compute_dense(topi.x86.dense_nopack),
        wrap_topi_schedule(topi.x86.schedule_dense_nopack),
        name="dense_nopack.x86",
        plevel=5,
    )

    strategy.add_implementation(
        wrap_compute_dense(topi.x86.dense_pack),
        wrap_topi_schedule(topi.x86.schedule_dense_pack),
        name="dense_pack.x86",
        plevel=10,
    )

    need_auto_scheduler_layout = is_auto_scheduler_enabled()
    need_meta_schedule_layout = is_meta_schedule_enabled()

    if need_auto_scheduler_layout or need_meta_schedule_layout:
        strategy.add_implementation(
            wrap_compute_dense(
                topi.nn.dense,
                need_auto_scheduler_layout=need_auto_scheduler_layout,
                need_meta_schedule_layout=need_meta_schedule_layout,
            ),
            naive_schedule,
            name="dense.generic",
            plevel=11,
        )

    if "cblas" in target.libs:
        with SpecializedCondition(same_type
                                  and dtype in ["float32", "float64"]):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_cblas),
                wrap_topi_schedule(topi.x86.schedule_dense_cblas),
                name="dense_cblas.x86",
                plevel=13,
            )
    if "mkl" in target.libs:
        with SpecializedCondition(
                same_type and dtype in ["float32", "float64"] or u8s8s32):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_mkl),
                wrap_topi_schedule(topi.x86.schedule_dense_mkl),
                name="dense_mkl.x86",
                plevel=14,
            )
    if "dnnl" in target.libs:
        with SpecializedCondition(same_type and dtype == "float32"):
            strategy.add_implementation(
                wrap_compute_dense(topi.x86.dense_dnnl),
                wrap_topi_schedule(topi.x86.schedule_dense_dnnl),
                name="dense_dnnl.x86",
                plevel=15,
            )
    return strategy