Beispiel #1
0
                ( 64, 64, 64, 1, 224,224, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64, 64,128, 1, 112,112, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,128,128, 1, 112,112, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,128,256, 1,  56, 56, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,256,256, 1,  56, 56, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,256,512, 1,  28, 28, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,512,512, 1,  28, 28, 1, 3, 3, 0,1,1, 1,1,1),
                ( 64,512,512, 1,  14, 14, 1, 3, 3, 0,1,1, 1,1,1),

                (128,  3, 64, 1, 224,224, 1,11,11, 0,3,3, 1,4,4),  #Alexnet
                (128, 64,192, 1,  27, 27, 1, 5, 5, 0,2,2, 1,1,1),
                (128,192,384, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),
                (128,384,256, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),
                (128,256,256, 1,  13, 13, 1, 3, 3, 0,1,1, 1,1,1),):

    conv = ng.conv_layer(dtype, *dims)

    N,C,K = conv.NCK
    D,H,W = conv.DHW
    T,R,S = conv.TRS
    M,P,Q = conv.MPQ
    pad_d, pad_h, pad_w = conv.padding
    str_d, str_h, str_w = conv.strides
    alpha, beta = (1.0, 0.0)

    dimI = conv.dimI2
    dimF = conv.dimF2
    dimO = conv.dimO2

    print "cudnn:"
Beispiel #2
0
def run():
    ng = NervanaGPU(stochastic_round=False)

    dt = np.float32
    # N: Number of images in mini-batch
    # C: Number of input feature maps
    # K: Number of output feature maps

    # D: Depth  of input image
    # H: Height of input image
    # W: Width  of input image

    # T: Depth  of filter kernel
    # R: Height of filter kernel
    # S: Width  of filter kernel
    # 
    # * images:      (numColors, imgSizeY, imgSizeX, numImages) with stride given
    # * filters:     (numColors, filterPixels, numFilters) if conv
    # *              (numModules, numColors, filterPixels, numFilters) otherwise
    # *
    # * targets:     (numFilters, numModulesY, numModulesX, numImages)

    N = 128
    C = 3
    K = 64

    D = 1
    H = 64
    W = 64

    T = 1
    R = 8
    S = 8

    pad_h = pad_w = 0
    str_h = str_w = 4

    layer = ng.conv_layer(dt, N, C, K,
            D=D, H=H, W=W,
            T=T, R=R, S=S,
            pad_d=0, pad_h=pad_h, pad_w=pad_w,
            str_d=1, str_h=str_h, str_w=str_w,
            grid_P=0, grid_Q=0, update_size=None)

    numImages = N 
    numFilters = K

    numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
    numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))

    print "Num Modules ", numModulesX, numModulesY


    # Set up images, filters, and outputs
    # imgd = np.loadtxt("im1.txt")
    # img = np.zeros((64, 64, 3))
    # print imgd.shape
    # for i in range(3):
    #     img[:, :, i] = imgd[i*64:(i+1)*64, :]
    # hostImages = np.tile(img)

    hostImages = np.random.rand(C, H, W, N)
    hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
    hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))

    print "Input sum", np.sum(hostImages)

    # Run cc2 kernel    
    devI = ng.array(hostImages, dtype=dt)
    devF = ng.array(hostFilters, dtype=dt)
    devO = ng.array(hostOutputs, dtype=dt)

    ng.fprop_cuda_conv(layer, devI, devF, devO)

    print "CC2 input sum: ", np.sum(devI.asnumpyarray())
    print "CC2 output sum: ", np.sum(devO.asnumpyarray())

    # Run maxwel kernel
    # images: (C * H * W, N)
    # filters:  (C * S * R , numFilters)
    # outputs:  (numFilters * numModulesX * numModulesY, N)
    devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
    devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
    devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)

    ng.fprop_conv(layer, devI, devF, devO2)
    print "NG input sum: ", np.sum(devI.asnumpyarray())
    print "NG output sum: ", np.sum(devO2.asnumpyarray())

    hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
    hostOutputs2 = devO2.asnumpyarray()

    for i in xrange(hostOutputs1.shape[0]):
       for j in xrange(hostOutputs1.shape[1]):
           assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)
Beispiel #3
0
    (64, 64, 64, 1, 224, 224, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 64, 128, 1, 112, 112, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 128, 128, 1, 112, 112, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 128, 256, 1, 56, 56, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 256, 256, 1, 56, 56, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 256, 512, 1, 28, 28, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 512, 512, 1, 28, 28, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (64, 512, 512, 1, 14, 14, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 3, 64, 1, 224, 224, 1, 11, 11, 0, 3, 3, 1, 4, 4),  #Alexnet
    (128, 64, 192, 1, 27, 27, 1, 5, 5, 0, 2, 2, 1, 1, 1),
    (128, 192, 384, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 384, 256, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
    (128, 256, 256, 1, 13, 13, 1, 3, 3, 0, 1, 1, 1, 1, 1),
):

    conv = ng.conv_layer(dtype, *dims)

    N, C, K = conv.NCK
    D, H, W = conv.DHW
    T, R, S = conv.TRS
    M, P, Q = conv.MPQ
    pad_d, pad_h, pad_w = conv.padding
    str_d, str_h, str_w = conv.strides
    alpha, beta = (1.0, 0.0)

    dimI = conv.dimI2
    dimF = conv.dimF2
    dimO = conv.dimO2

    print "cudnn:"
Beispiel #4
0
print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})

ops  = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu  = 0  # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32

ng = NervanaGPU(stochastic_round=False, bench=True)

conv = ng.conv_layer(
    dtype,
    16,3,8,    # N,C,K
    1,64,64,   # D,H,W
    1,3,3,     # T,R,S
    0,1,1,     # padding
    1,1,1)     # strides


dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO

# colapse outer dimensions into one and preserve inner dimension
# this allows for easy cpu convolution in numpy
def slicable(dim, pad=0):
    dim0 = reduce(mul, dim[:-1], 1) + pad
    return (dim0, dim[-1])
Beispiel #5
0
print(context.get_device().name())

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})

ops  = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu  = 0  # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32

ng = NervanaGPU(stochastic_round=False, bench=True)

conv = ng.conv_layer(
    dtype,
    16,3,8,    # N,C,K
    1,64,64,   # D,H,W
    1,3,3,     # T,R,S
    0,1,1,     # padding
    1,1,1)     # strides


dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO

# colapse outer dimensions into one and preserve inner dimension
# this allows for easy cpu convolution in numpy
def slicable(dim, pad=0):
    dim0 = reduce(mul, dim[:-1], 1) + pad
    return (dim0, dim[-1])