Python UpdateCuda Examples

Programming Language: Python

Namespace/Package Name: neon.backends.convolution

Method/Function: UpdateCuda

Examples at hotexamples.com: 3

Python UpdateCuda - 3 examples found. These are the top rated real world Python examples of neon.backends.convolution.UpdateCuda extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def __init__(self,
                 lib,
                 dtype,
                 N,
                 C,
                 K,
                 D=1,
                 H=1,
                 W=1,
                 T=1,
                 R=1,
                 S=1,
                 pad_d=0,
                 pad_h=0,
                 pad_w=0,
                 str_d=1,
                 str_h=1,
                 str_w=1):

        super(ConvLayer, self).__init__(lib, dtype, N, np.float32)

        # Compute the output spatial dimensions
        M = lib.output_dim(D, T, pad_d, str_d)
        P = lib.output_dim(H, R, pad_h, str_h)
        Q = lib.output_dim(W, S, pad_w, str_w)

        self.C = C
        self.K = K
        self.M = M
        self.P = P
        self.Q = Q
        self.NCK = (N, C, K)
        self.TRS = (T, R, S)
        self.DHW = (D, H, W)
        self.MPQ = (M, P, Q)
        self.padding = (pad_d, pad_h, pad_w)
        self.strides = (str_d, str_h, str_w)

        self.all_params = (N, C, K, D, H, W, T, R, S, pad_d, pad_h, pad_w,
                           str_d, str_h, str_w)

        self.dimI = (C, D, H, W, N)
        self.dimF = (C, T, R, S, K)
        self.dimFb = (K, T, R, S, C)
        self.dimO = (K, M, P, Q, N)
        self.dimI2 = (C * D * H * W, N)
        self.dimF2 = (C * T * R * S, K)
        self.dimF2t = (K, C * T * R * S)
        self.dimO2 = (K * M * P * Q, N)
        self.dimS = (K, 1)
        self.sizeI = reduce(mul, self.dimI, 1)
        self.sizeF = reduce(mul, self.dimF, 1)
        self.sizeO = reduce(mul, self.dimO, 1)
        self.nOut = reduce(mul, self.MPQ, 1) * K

        # flop count for benchmarking
        self.flops = P * Q * M * K * N * C * R * S * T * 2.0

        args = (lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q, pad_d,
                pad_h, pad_w, str_d, str_h, str_w)

        #lib.enable_winograd = 0

        ####### Cuda C ###########
        if lib.use_cudac_kernels:

            #3D conv not supported yet
            if T > 1 or D > 1:
                raise ValueError(
                    "3D Convolution not supported by CUDA C kernels and pre-Maxwell GPUs"
                )

            # TODO small C bprop?
            self.fprop_kernels = convolution.FpropCuda(*args)
            self.bprop_kernels = convolution.BpropCuda(*args)
            self.updat_kernels = convolution.UpdateCuda(*args)

        ####### Winograd ###########
        elif lib.enable_winograd and R == 3 and S == 3 and all(
                x == 1 for x in (D, M, T, str_w, str_h, str_d)):
            from .winograd_conv import (FpropWinograd_2x2_3x3,
                                        BpropWinograd_2x2_3x3,
                                        UpdateWinograd_3x3_2x2,
                                        FpropWinograd_4x4_3x3,
                                        BpropWinograd_4x4_3x3,
                                        UpdateWinograd_3x3_4x4)

            # Temp for now till we can autotune
            # 2 is safer for fp16 without batchnorm
            if dtype == np.float32 and lib.enable_winograd == 4:
                winograd = 4
            else:
                winograd = 2

            if C < 8:
                self.fprop_kernels = convolution.FpropDirect(*args)
            elif winograd == 4 and H * W < 112 * 112:
                self.fprop_kernels = FpropWinograd_4x4_3x3(*args)
            else:
                self.fprop_kernels = FpropWinograd_2x2_3x3(*args)

            if winograd == 4 and H * W < 112 * 112:
                self.bprop_kernels = BpropWinograd_4x4_3x3(*args)
            else:
                self.bprop_kernels = BpropWinograd_2x2_3x3(*args)

            if N >= 4 and (C < 8 or H * W > 112 * 112):
                self.updat_kernels = convolution.UpdateDirect(*args)
            elif winograd == 4:
                self.updat_kernels = UpdateWinograd_3x3_4x4(*args)
            else:
                self.updat_kernels = UpdateWinograd_3x3_2x2(*args)

#        elif lib.enable_winograd and not lib.deterministic and N > 1 and \
#            R == 5 and S == 5 and all(x == 1 for x in (D,M,T,str_w,str_h,str_d)):
#
#            from .winograd_conv import (FpropWinograd_2x2_5x5, BpropWinograd_2x2_5x5)
#
#            self.fprop_kernels = FpropWinograd_2x2_5x5(*args)
#            self.bprop_kernels = BpropWinograd_2x2_5x5(*args)
#            if N >= 4:
#                self.updat_kernels = convolution.UpdateDirect(*args)

####### Direct ###########
        else:

            self.fprop_kernels = convolution.FpropDirect(*args)
            self.bprop_kernels = convolution.BpropDirect(*args)
            if N >= 4:
                self.updat_kernels = convolution.UpdateDirect(*args)

Example #2

Show file

    def __init__(self, lib, dtype,
                 N, C, K,
                 D=1, H=1, W=1,
                 T=1, R=1, S=1,
                 pad_d=0, pad_h=0, pad_w=0,
                 str_d=1, str_h=1, str_w=1,
                 relu=False, bsum=False):

        super(ConvLayer, self).__init__(lib, dtype, N, np.float32)

        # Compute the output spatial dimensions
        M = lib.output_dim(D, T, pad_d, str_d)
        P = lib.output_dim(H, R, pad_h, str_h)
        Q = lib.output_dim(W, S, pad_w, str_w)

        self.C = C
        self.K = K
        self.M = M
        self.P = P
        self.Q = Q
        self.NCK = (N, C, K)
        self.TRS = (T, R, S)
        self.DHW = (D, H, W)
        self.MPQ = (M, P, Q)
        self.padding = (pad_d, pad_h, pad_w)
        self.strides = (str_d, str_h, str_w)
        self.relu = relu
        self.bsum = bsum

        self.all_params = (N, C, K, D, H, W, T, R, S, pad_d, pad_h, pad_w, str_d, str_h, str_w)

        self.dimI   = (C, D, H, W, N)
        self.dimF   = (C, T, R, S, K)
        self.dimFb  = (K, T, R, S, C)
        self.dimO   = (K, M, P, Q, N)
        self.dimI2  = (C*D*H*W, N)
        self.dimF2  = (C*T*R*S, K)
        self.dimF2t = (K, C*T*R*S)
        self.dimO2  = (K*M*P*Q, N)
        self.dimS   = (K, 1)
        self.sizeI  = reduce(mul, self.dimI, 1)
        self.sizeF  = reduce(mul, self.dimF, 1)
        self.sizeO  = reduce(mul, self.dimO, 1)
        self.nOut   = reduce(mul, self.MPQ, 1) * K

        # flop count for benchmarking
        self.flops = P*Q*M*K*N*C*R*S*T * 2.0

        ####### Cuda C ###########
        if lib.use_cudac_kernels:

            #3D conv not supported yet
            if T > 1 or D > 1:
                raise ValueError("3D Convolution not supported by CUDA C kernels.")

            self.fprop_kernels = convolution.FpropCuda(lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                                                       pad_d, pad_h, pad_w, str_d, str_h, str_w, bsum=bsum)
            # TODO small C bprop?
            self.bprop_kernels = convolution.BpropCuda(lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                                                       pad_d, pad_h, pad_w, str_d, str_h, str_w, bsum=bsum)
            self.updat_kernels = convolution.UpdateCuda(lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                                                        pad_d, pad_h, pad_w, str_d, str_h, str_w) 
            
        ####### Winograd ###########
        elif lib.have_winograd and R == 3 and S == 3 and all(x == 1 for x in (D,M,T,str_w,str_h,str_d)):
            from winograd.convolution import FpropWinograd, BpropWinograd, UpdateWinograd

            if N >= 64 and C < 8 or not lib.have_winograd:
                self.fprop_kernels = convolution.FpropDirect(
                    lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                     pad_d, pad_h, pad_w, str_d, str_h, str_w, relu, bsum)
            else:
                self.fprop_kernels = FpropWinograd(
                    lib, self.dtype, N, C, K, H, W, P, Q, pad_h, pad_w, relu, bsum)

            self.bprop_kernels = BpropWinograd(
                lib, self.dtype, N, C, K, H, W, P, Q, pad_h, pad_w, relu, bsum)

            if N >=32 and C < 8 or not lib.have_winograd:
                self.updat_kernels = convolution.UpdateDirect(
                    lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                     pad_d, pad_h, pad_w, str_d, str_h, str_w)
            else:
                self.updat_kernels = UpdateWinograd(
                    lib, self.dtype, N, C, K, H, W, P, Q, pad_h, pad_w)

        ####### Direct ###########
        else:
            vec_size = 4 if self.dtype.itemsize == 4 else 8

            self.fprop_kernels = convolution.FpropDirect(
                lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                 pad_d, pad_h, pad_w, str_d, str_h, str_w, relu, bsum)

            if C % vec_size == 0:
                self.bprop_kernels = convolution.BpropDirect(
                    lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                     pad_d, pad_h, pad_w, str_d, str_h, str_w, relu, bsum)
            else:
                # special kernel for deconv into first layer
                self.bprop_kernels = convolution.BpropDirectSmallC(
                    lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                     pad_d, pad_h, pad_w, str_d, str_h, str_w)

            self.updat_kernels = convolution.UpdateDirect(
                lib, self.dtype, N, C, K, D, H, W, T, R, S, M, P, Q,
                 pad_d, pad_h, pad_w, str_d, str_h, str_w)

        logger.debug("%s: %s, %s, %s", str(self), str(self.fprop_kernels), str(self.bprop_kernels), str(self.updat_kernels))

Example #3

Show file

File: layer_gpu.py Project: hughperkins/neon

    def __init__(self,
                 lib,
                 dtype,
                 N,
                 C,
                 K,
                 D=1,
                 H=1,
                 W=1,
                 T=1,
                 R=1,
                 S=1,
                 pad_d=0,
                 pad_h=0,
                 pad_w=0,
                 str_d=1,
                 str_h=1,
                 str_w=1,
                 bsum=False):
        super(ConvLayer, self).__init__(lib, dtype, N, np.float32)

        # Compute the output spatial dimensions
        M = lib.output_dim(D, T, pad_d, str_d)
        P = lib.output_dim(H, R, pad_h, str_h)
        Q = lib.output_dim(W, S, pad_w, str_w)

        self.C = C
        self.K = K
        self.M = M
        self.P = P
        self.Q = Q
        self.NCK = (N, C, K)
        self.TRS = (T, R, S)
        self.DHW = (D, H, W)
        self.MPQ = (M, P, Q)
        self.padding = (pad_d, pad_h, pad_w)
        self.strides = (str_d, str_h, str_w)
        self.bsum = bsum

        self.all_params = (N, C, K, D, H, W, T, R, S, pad_d, pad_h, pad_w,
                           str_d, str_h, str_w)

        self.dimI = (C, D, H, W, N)
        self.dimF = (C, T, R, S, K)
        self.dimFb = (K, T, R, S, C)
        self.dimO = (K, M, P, Q, N)
        self.dimI2 = (C * D * H * W, N)
        self.dimF2 = (C * T * R * S, K)
        self.dimF2t = (K, C * T * R * S)
        self.dimO2 = (K * M * P * Q, N)
        self.dimS = (K, 1)
        self.sizeI = reduce(mul, self.dimI, 1)
        self.sizeF = reduce(mul, self.dimF, 1)
        self.sizeO = reduce(mul, self.dimO, 1)
        self.nOut = reduce(mul, self.MPQ, 1) * K

        # flop count for benchmarking
        self.flops = P * Q * M * K * N * C * R * S * T * 2.0

        if T > 1 or D > 1:
            raise ValueError("3D Convolution not supported by CUDA C kernels.")

        self.fprop_kernels = convolution.FpropCuda(lib,
                                                   self.dtype,
                                                   N,
                                                   C,
                                                   K,
                                                   D,
                                                   H,
                                                   W,
                                                   T,
                                                   R,
                                                   S,
                                                   M,
                                                   P,
                                                   Q,
                                                   pad_d,
                                                   pad_h,
                                                   pad_w,
                                                   str_d,
                                                   str_h,
                                                   str_w,
                                                   bsum=bsum)
        # TODO small C bprop?
        self.bprop_kernels = convolution.BpropCuda(lib,
                                                   self.dtype,
                                                   N,
                                                   C,
                                                   K,
                                                   D,
                                                   H,
                                                   W,
                                                   T,
                                                   R,
                                                   S,
                                                   M,
                                                   P,
                                                   Q,
                                                   pad_d,
                                                   pad_h,
                                                   pad_w,
                                                   str_d,
                                                   str_h,
                                                   str_w,
                                                   bsum=bsum)
        self.updat_kernels = convolution.UpdateCuda(lib, self.dtype, N, C, K,
                                                    D, H, W, T, R, S, M, P, Q,
                                                    pad_d, pad_h, pad_w, str_d,
                                                    str_h, str_w)