Ejemplo n.º 1
0
def three():
    image_size = 32
    batch_size = 32
    input_filters = 512
    output_filters = 512

    np.random.seed(123)
    
    with make_backend(batch_size=batch_size,
            datatype=np.float32, device_id=0) as be:
        W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32)
        W_cuda = MyTensor.from_np(W)

        print('type(W_cuda)', type(W_cuda))

        inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
        inputs[:] = np.random.randn(*inputs.shape)
        inputs_cuda = MyTensor.from_np(inputs)

        print('type(inputs_cuda)', type(inputs_cuda))

        conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init)
        print('created conv')
        conv.W = W_cuda

        conv.configure((input_filters,image_size, image_size))
        conv.W = W_cuda
        print('configure done')
        outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
        outputs_cuda = MyTensor.from_np(outputs)
        conv.outputs = outputs_cuda
        conv.fprop(inputs_cuda)
        cuda.Context.synchronize()
        for it in range(3):
          start = time.time()
          conv.fprop(inputs_cuda)
          cuda.Context.synchronize()
          print('time=', time.time() - start)

    #    outputs = outputs_cuda.get()
        outputs_cuda.to_host()
        print(outputs[1:3,1:3])
        print('outputs.shape', outputs.shape)
        printDims(W=W, I=inputs)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3)
        check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
Ejemplo n.º 2
0
    W_cuda = gpuarray.to_gpu(W)
    conv.W = W_cuda

    print('type(W_cuda)', type(W_cuda))

    inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32)
    inputs[:] = np.random.randn(*inputs.shape)
    inputs_cuda = gpuarray.to_gpu(inputs)

    print('type(inputs_cuda)', type(inputs_cuda))

    conv.configure((input_filters,image_size, image_size))
    print('configure done')
    outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32)
    outputs_cuda = gpuarray.to_gpu(outputs)
    conv.outputs = outputs_cuda
    conv.fprop(inputs_cuda)
    for it in range(3):
      start = time.time()
      for i in range(10):
        conv.fprop(inputs_cuda)
      cuda.Context.synchronize()
      print('time=', time.time() - start)


    outputs = outputs_cuda.get()
    print(outputs[1:3,1:3])

    assert abs(outputs[1,1] - 1.33960593) < 1e-4
    assert abs(outputs[1,2] + 6.06682396) < 1e-4
    assert abs(outputs[2,2] - 8.76905346) < 1e-4