Python Scalar Examples

Programming Language: Python

Namespace/Package Name: isaac

Method/Function: Scalar

Examples at hotexamples.com: 4

Python Scalar - 4 examples found. These are the top rated real world Python examples of isaac.Scalar extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def isaacGemm(ctx, stream, shapes, layouts):
    # Shapes
    offa, offb, offc = 0, 0, 0
    dtype, AT, BT, M, N, K = shapes
    dtype = sc.dtype(dtype)
    AT, BT = sc.templates.op(AT), sc.templates.op(BT)
    ldc = M
    lda = M if AT == sc.templates.OP_N else K
    ldb = K if BT == sc.templates.OP_N else N
    # Kernel
    generator = sc.templates.GEMM(dtype, AT, BT, M, N, K, offa, lda, offb, ldb,
                                  offc, ldc, *layouts)
    src = generator.dump(ctx.device, "gemm")
    module = sc.driver.Module(ctx, src)
    kernel = sc.driver.Kernel(module, "gemm")
    with lock:
        # BuffeNfilt
        C = sc.driver.Buffer(ctx, M * N * sc.size_of(dtype))
        A = sc.driver.Buffer(ctx, M * K * sc.size_of(dtype))
        B = sc.driver.Buffer(ctx, K * N * sc.size_of(dtype))
        alpha, beta = sc.Scalar(1., dtype), sc.Scalar(0., dtype)
        # Result
        ts = benchmark(
            lambda: (generator.enqueue(kernel, stream, alpha, A, B, beta, C),
                     stream.synchronize()), ctx.device, 1e-2)
    tflops = 2 * M * N * K / ts * 1e-12
    return tflops

Example #2

Show file

def isaacConv(ctx, stream, shapes, layouts):
    # Shapes
    dtype, Npix, K, C, Nfilt = shapes
    N, M, P, Q = 1, 1, 1, Npix
    T, R, S = 1, 1, Nfilt
    dtype = sc.dtype(dtype)
    pad_d, pad_h, pad_w, stride_d, stride_h, stride_w = 0, 0, 0, 1, 1, 1
    D = M * stride_d + T - 1 - 2 * pad_d - stride_d + 1
    H = P * stride_h + R - 1 - 2 * pad_h - stride_h + 1
    W = Q * stride_w + S - 1 - 2 * pad_w - stride_w + 1
    # Kernel
    generator = sc.templates.Conv(sc.dtype(dtype), C, D, H, W, N, K, M, P, Q,
                                  T, R, S, pad_d, pad_h, pad_w, stride_d,
                                  stride_h, stride_w, *layouts)
    src = generator.dump(ctx.device, "conv_fprop")
    module = sc.driver.Module(ctx, src)
    kernel = sc.driver.Kernel(module, "conv_fprop")
    with lock:
        # Buffers
        O = sc.driver.Buffer(ctx, K * M * P * Q * N * sc.size_of(dtype))
        I = sc.driver.Buffer(ctx, C * D * H * W * N * sc.size_of(dtype))
        F = sc.driver.Buffer(ctx, C * T * R * S * K * sc.size_of(dtype))
        alpha, beta = sc.Scalar(1., dtype), sc.Scalar(0., dtype)
        # Result
        time = benchmark(
            lambda: (generator.enqueue(kernel, stream, alpha, I, F, beta, O),
                     stream.synchronize()), ctx.device, 1e-2)
    tflops = 2 * M * P * Q * N * K * C * T * R * S / time * 1e-12
    return tflops

Example #3

Show file

File: operations.py Project: wade1990/isaac-2

def cudaGemm(ctx, stream, dtype, AT, BT, M, N, K):
    ldc = M
    lda = M if AT==1 else K
    ldb = K if BT==1 else N
    dtype = sc.dtype(dtype)
    C = sc.driver.Buffer(ctx, M*N*sc.size_of(dtype))
    A = sc.driver.Buffer(ctx, M*K*sc.size_of(dtype))
    B = sc.driver.Buffer(ctx, K*N*sc.size_of(dtype))
    alpha, beta = sc.Scalar(1., dtype), sc.Scalar(0., dtype)
    time = benchmark(lambda: (sc.driver.cublasGemm(dtype, ctx, stream, 'N' if AT==1 else 'T', 'N' if BT==1 else 'T', M, N, K, alpha, A, lda, B,  ldb, beta, C, ldc), stream.synchronize()), ctx.device, 1e-2)
    tflops = 2*M*N*K/time*1e-12
    return tflops

Example #4

Show file

File: operations.py Project: wade1990/isaac-2

def cudaConv(ctx, stream, dtype, N, K, P, Q, C, R, S):
    pad_h, pad_w, stride_h, stride_w = 0, 0, 1, 1
    H = P*stride_h + R - 1 - 2*pad_h
    W = Q*stride_w + S - 1 - 2*pad_w
    dtype = sc.dtype(dtype)
    O = sc.driver.Buffer(ctx, K*P*Q*N*sc.size_of(dtype))
    I = sc.driver.Buffer(ctx, C*H*W*N*sc.size_of(dtype))
    F = sc.driver.Buffer(ctx, C*R*S*K*sc.size_of(dtype))
    alpha, beta = sc.Scalar(1., dtype), sc.Scalar(0., dtype)
    time = benchmark(lambda: (sc.driver.cudnnConv(dtype, ctx, stream, H, W, N, K, P, Q, C, R, S, pad_h, pad_w, stride_h, stride_w, alpha, I, F, beta, O), stream.synchronize()), ctx.device, 1e-2)
    tflops = 2*P*Q*K*N*C*R*S/time*1e-12
    return tflops