Esempio n. 1
0
def shfl_down_i32(mask, val, offset):
    # Here we use 31 as the last argument since 32 (warp size) does not work
    # for some reason. Using 31 leads to the desired behavior.
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_down_sync_i32",
            expr.make_expr_group(mask, val, offset, 31), False))
Esempio n. 2
0
def shfl_up_f32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_up_sync_f32",
            # lane offset is 0 for warp size 32
            expr.make_expr_group(mask, val, offset, 0),
            False))
Esempio n. 3
0
def shfl_sync_i32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            # lane offset is 31 for warp size 32
            "cuda_shfl_sync_i32",
            expr.make_expr_group(mask, val, offset, 31),
            False))
Esempio n. 4
0
def call_internal(name, *args):
    return expr_init(
        _ti_core.insert_internal_func_call(name, make_expr_group(args)))
Esempio n. 5
0
def shfl_up_f32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_up_sync_f32",
            expr.make_expr_group(mask, val, offset, 32), False))
Esempio n. 6
0
def ballot(predicate):
    return expr.Expr(
        _ti_core.insert_internal_func_call("cuda_ballot_i32",
                                           expr.make_expr_group(predicate),
                                           False))
Esempio n. 7
0
def reduce_xor(value):
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupXor", expr.make_expr_group(value), False),
                     dtype=value.ptr.get_ret_type())
Esempio n. 8
0
def invocation_id():
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupInvocationId", expr.make_expr_group(), False),
                     dtype=i32)
Esempio n. 9
0
def group_size():
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupSize", expr.make_expr_group(), False),
                     dtype=i32)
Esempio n. 10
0
def barrier():
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupBarrier",
                                           expr.make_expr_group(), False))
Esempio n. 11
0
def broadcast(value, index: i32):
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupBroadcast",
                                           expr.make_expr_group(value, index),
                                           False))
Esempio n. 12
0
def elect():
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupElect",
                                           expr.make_expr_group(), False))
Esempio n. 13
0
def inclusive_or(value):
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupInclusiveOr", expr.make_expr_group(value), False),
                     dtype=value.ptr.get_ret_type())
Esempio n. 14
0
def call_internal(name, *args, with_runtime_context=True):
    return expr_init(
        _ti_core.insert_internal_func_call(name, make_expr_group(args),
                                           with_runtime_context))