コード例 #1
0
ファイル: cudnn.py プロジェクト: KayneWest/nervanagpu
                ( 64, 64, 64, 1, 224,224, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64, 64,128, 1, 112,112, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,128,128, 1, 112,112, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,128,256, 1,  56, 56, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,256,256, 1,  56, 56, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,256,512, 1,  28, 28, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,512,512, 1,  28, 28, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,512,512, 1,  14, 14, 1, 3, 3, 0,1,1, 1,1,1),

                (128,  3, 64, 1, 224,224, 1,11,11, 0,3,3, 1,4,4),  #Alexnet
                (128, 64,192, 1,  27, 27, 1, 5, 5, 0,2,2, 1,1,1),
                (128,192,384, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),
                (128,384,256, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),
                (128,256,256, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),):

    conv = ng.conv_layer(dtype, *dims)

    N,C,K = conv.NCK
    D,H,W = conv.DHW
    T,R,S = conv.TRS
    M,P,Q = conv.MPQ
    pad_d, pad_h, pad_w = conv.padding
    str_d, str_h, str_w = conv.strides
    alpha, beta = (1.0, 0.0)

    dimI = conv.dimI2
    dimF = conv.dimF2
    dimO = conv.dimO2

    print "cudnn:"
コード例 #2
0
ファイル: testcudaconv.py プロジェクト: jcoreyes/nervanagpu
def run():
    ng = NervanaGPU(stochastic_round=False)

    dt = np.float32
    # N: Number of images in mini-batch
    # C: Number of input feature maps
    # K: Number of output feature maps

    # D: Depth  of input image
    # H: Height of input image
    # W: Width  of input image

    # T: Depth  of filter kernel
    # R: Height of filter kernel
    # S: Width  of filter kernel
    # 
    # * images:      (numColors, imgSizeY, imgSizeX, numImages) with stride given
    # * filters:     (numColors, filterPixels, numFilters) if conv
    # *              (numModules, numColors, filterPixels, numFilters) otherwise
    # *
    # * targets:     (numFilters, numModulesY, numModulesX, numImages)

    N = 128
    C = 3
    K = 64

    D = 1
    H = 64
    W = 64

    T = 1
    R = 8
    S = 8

    pad_h = pad_w = 0
    str_h = str_w = 4

    layer = ng.conv_layer(dt, N, C, K,
            D=D, H=H, W=W,
            T=T, R=R, S=S,
            pad_d=0, pad_h=pad_h, pad_w=pad_w,
            str_d=1, str_h=str_h, str_w=str_w,
            grid_P=0, grid_Q=0, update_size=None)

    numImages = N 
    numFilters = K

    numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
    numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))

    print "Num Modules ", numModulesX, numModulesY


    # Set up images, filters, and outputs
    # imgd = np.loadtxt("im1.txt")
    # img = np.zeros((64, 64, 3))
    # print imgd.shape
    # for i in range(3):
    #     img[:, :, i] = imgd[i*64:(i+1)*64, :]
    # hostImages = np.tile(img)

    hostImages = np.random.rand(C, H, W, N)
    hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
    hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))

    print "Input sum", np.sum(hostImages)

    # Run cc2 kernel    
    devI = ng.array(hostImages, dtype=dt)
    devF = ng.array(hostFilters, dtype=dt)
    devO = ng.array(hostOutputs, dtype=dt)

    ng.fprop_cuda_conv(layer, devI, devF, devO)

    print "CC2 input sum: ", np.sum(devI.asnumpyarray())
    print "CC2 output sum: ", np.sum(devO.asnumpyarray())

    # Run maxwel kernel
    # images: (C * H * W, N)
    # filters:  (C * S * R , numFilters)
    # outputs:  (numFilters * numModulesX * numModulesY, N)
    devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
    devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
    devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)

    ng.fprop_conv(layer, devI, devF, devO2)
    print "NG input sum: ", np.sum(devI.asnumpyarray())
    print "NG output sum: ", np.sum(devO2.asnumpyarray())

    hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
    hostOutputs2 = devO2.asnumpyarray()

    for i in xrange(hostOutputs1.shape[0]):
       for j in xrange(hostOutputs1.shape[1]):
           assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)
コード例 #3
0
    (64, 64, 64, 1, 224, 224, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 64, 128, 1, 112, 112, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 128, 128, 1, 112, 112, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 128, 256, 1, 56, 56, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 256, 256, 1, 56, 56, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 256, 512, 1, 28, 28, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 512, 512, 1, 28, 28, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 512, 512, 1, 14, 14, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 3, 64, 1, 224, 224, 1, 11, 11, 0, 3, 3, 1, 4, 4),  #Alexnet
    (128, 64, 192, 1, 27, 27, 1, 5, 5, 0, 2, 2, 1, 1, 1),
    (128, 192, 384, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 384, 256, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 256, 256, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
):

    conv = ng.conv_layer(dtype, *dims)

    N, C, K = conv.NCK
    D, H, W = conv.DHW
    T, R, S = conv.TRS
    M, P, Q = conv.MPQ
    pad_d, pad_h, pad_w = conv.padding
    str_d, str_h, str_w = conv.strides
    alpha, beta = (1.0, 0.0)

    dimI = conv.dimI2
    dimF = conv.dimF2
    dimO = conv.dimO2

    print "cudnn:"
コード例 #4
0
ファイル: conv_test.py プロジェクト: KayneWest/nervanagpu
print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})

ops  = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu  = 0  # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32

ng = NervanaGPU(stochastic_round=False, bench=True)

conv = ng.conv_layer(
    dtype,
    16,3,8,    # N,C,K
    1,64,64,   # D,H,W
    1,3,3,     # T,R,S
    0,1,1,     # padding
    1,1,1)     # strides


dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO

# colapse outer dimensions into one and preserve inner dimension
# this allows for easy cpu convolution in numpy
def slicable(dim, pad=0):
    dim0 = reduce(mul, dim[:-1], 1) + pad
    return (dim0, dim[-1])
コード例 #5
0
ファイル: conv_test.py プロジェクト: honorpeter/Caffe2_fpga
print(context.get_device().name())

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})

ops  = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu  = 0  # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32

ng = NervanaGPU(stochastic_round=False, bench=True)

conv = ng.conv_layer(
    dtype,
    16,3,8,    # N,C,K
    1,64,64,   # D,H,W
    1,3,3,     # T,R,S
    0,1,1,     # padding
    1,1,1)     # strides


dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO

# colapse outer dimensions into one and preserve inner dimension
# this allows for easy cpu convolution in numpy
def slicable(dim, pad=0):
    dim0 = reduce(mul, dim[:-1], 1) + pad
    return (dim0, dim[-1])