Example #1
0
def minimum_cuda(a, b=None):
    """Minimum values of two GPUArrays.

    Parameters
    ----------
    a : gpuarray
        First GPUArray.
    b : gpuarray
        Second GPUArray.

    Returns
    -------
    gpuarray
        Minimum values from both GPArrays, or single value if one GPUarray.

    Examples
    --------
    >>> a = minimum_cuda(give_cuda([1, 2, 3]), give_cuda([3, 2, 1]))
    [1, 2, 1]

    >>> type(a)
    <class 'pycuda.gpuarray.GPUArray'>
    """
    if b is not None:
        return cuda_array.minimum(a, b)
    return cuda_array.min(a)
Example #2
0
    def test_minimum_maximum_scalar(self):
        from pycuda.curandom import rand as curand

        l = 20
        a_gpu = curand((l,))
        a = a_gpu.get()

        import pycuda.gpuarray as gpuarray

        max_a0_gpu = gpuarray.maximum(a_gpu, 0)
        min_a0_gpu = gpuarray.minimum(0, a_gpu)

        assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0
        assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0
Example #3
0
    def test_minimum_maximum_scalar(self):
        from pycuda.curandom import rand as curand

        sz = 20
        a_gpu = curand((sz, ))
        a = a_gpu.get()

        import pycuda.gpuarray as gpuarray

        max_a0_gpu = gpuarray.maximum(a_gpu, 0)
        min_a0_gpu = gpuarray.minimum(0, a_gpu)

        assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0
        assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0
Example #4
0
    def test_if_positive(self):
        from pycuda.curandom import rand as curand

        sz = 20
        a_gpu = curand((sz, ))
        b_gpu = curand((sz, ))
        a = a_gpu.get()
        b = b_gpu.get()

        import pycuda.gpuarray as gpuarray

        max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu)
        min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu)

        print(max_a_b_gpu)
        print(np.maximum(a, b))

        assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0
        assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
Example #5
0
    def test_if_positive(self):
        from pycuda.curandom import rand as curand

        l = 20
        a_gpu = curand((l,))
        b_gpu = curand((l,))
        a = a_gpu.get()
        b = b_gpu.get()

        import pycuda.gpuarray as gpuarray

        max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu)
        min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu)

        print (max_a_b_gpu)
        print((np.maximum(a, b)))

        assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0
        assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
Example #6
0
def demosaick_gpu(img):
    img = gp.to_gpu(img)
    p2x = im2col(img, _i2c2)
    cm.log(img + _eps, out=img)
    p1x = im2col(img, _i2c1)

    wA = p1x.shape[0]
    wB = p2x.shape[0]
    hA = p1x.shape[1]
    hB = p2x.shape[1]

    # Path 1
    p1x = p1x.reshape([wA * hA, 576])
    p1y = lg.dot(p1x, _wts.int1)
    cm.exp(p1y, out=p1y)

    p1y = p1y.reshape([wA * hA * 64, 3 * _ofac])
    p1x = lg.dot(p1y, _wts.int2)
    msc.add_matvec(p1x, _wts.int2b, out=p1x)
    p1x = p1x.reshape([wA * hA * 64 * 3, _ofac])

    # Path 2
    # conv1
    p2x = p2x.reshape([wB * hB, 64])
    p2y = lg.dot(p2x, _wts.c1)
    msc.add_matvec(p2y, _wts.c1b, out=p2y)
    gp.maximum(p2y, 0., p2y)
    p2y = p2y.reshape([wB, hB, _numsel])

    # conv2
    shI = [wB - 1, hB - 1, _numsel]
    shM = [(wB - 1) * (hB - 1), _numsel]
    p2x = gp.empty(shM, dtype=np.float32)
    pTT = gp.empty(shI, dtype=np.float32)

    pTT = pTT.reshape(shI)
    pTT[...] = p2y[0:-1, 0:-1, :]
    pTT = pTT.reshape(shM)
    p2x = lg.dot(pTT, _wts.c200)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[0:-1, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c201, p2x)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[1:, 0:-1, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c210, p2x)
    pTT = pTT.reshape(shI)
    pTT[...] = p2y[1:, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c211, p2x)
    msc.add_matvec(p2x, _wts.c2b, out=p2x)
    gp.maximum(p2x, 0., p2x)
    p2x = p2x.reshape(shI)

    # conv 3
    shI = [wB - 2, hB - 2, _numsel]
    shM = [(wB - 2) * (hB - 2), _numsel]
    p2y = gp.empty(shM, dtype=np.float32)
    pTT = gp.empty(shI, dtype=np.float32)

    pTT = pTT.reshape(shI)
    pTT[...] = p2x[0:-1, 0:-1, :]
    pTT = pTT.reshape(shM)
    p2y = lg.dot(pTT, _wts.c300)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[0:-1, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c301, p2y)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[1:, 0:-1, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c310, p2y)
    pTT = pTT.reshape(shI)
    pTT[...] = p2x[1:, 1:, :]
    pTT = pTT.reshape(shM)
    lg.add_dot(pTT, _wts.c311, p2y)
    msc.add_matvec(p2y, _wts.c3b, out=p2y)
    gp.maximum(p2y, 0., p2y)

    p2x = lg.dot(p2y, _wts.sout)

    msc.add_matvec(p2x, _wts.soutb, out=p2x)
    gp.maximum(p2x, 0., p2x)
    p2x = p2x.reshape(p1x.shape)

    # Combine
    p1x *= p2x
    p1 = msc.sum(p1x, axis=1)
    gp.maximum(p1, 0., p1)
    gp.minimum(p1, 1., p1)
    p1 = p1.reshape([wA, hA, 64 * 3])

    im = p2im(p1.get())

    return im
Example #7
0
def roi_pool(feature_maps, input_rois):
    feature_maps_gpu = gpuarray.to_gpu(feature_maps)
    input_rois_gpu = gpuarray.to_gpu(input_rois)

    roi_pool_conv5 = gpuarray.to_gpu(np.zeros([256, 36], dtype=np.float32))

    pooled_height = 6
    pooled_width = 6
    spatial_scale = 0.0625

    batch_size, height, width, channels = feature_maps_gpu.shape
    num_rois = input_rois_gpu.shape[0]

    roi_pool_conv5s = gpuarray.to_gpu(
        np.zeros([num_rois, 9216], dtype=np.float32))
    for i in range(num_rois):
        #		roi_batch_ind = input_rois_gpu[i, 0]
        roi_start_w = cumath.floor(input_rois_gpu[i, 1] *
                                   spatial_scale)  # should be round()
        roi_start_h = cumath.floor(input_rois_gpu[i, 2] *
                                   spatial_scale)  # should be round()
        roi_end_w = cumath.floor(input_rois_gpu[i, 3] *
                                 spatial_scale)  # should be round()
        roi_end_h = cumath.floor(input_rois_gpu[i, 4] *
                                 spatial_scale)  # should be round()

        roi_height = gpuarray.maximum(roi_end_h - roi_start_h + 1, 1)
        roi_width = gpuarray.maximum(roi_end_w - roi_start_w + 1, 1)
        bin_size_h = roi_height / float(pooled_height)
        bin_size_w = roi_width / float(pooled_width)

        for c in range(channels):
            for ph in range(pooled_height):
                for pw in range(pooled_width):
                    hstart = cumath.floor(ph * bin_size_h)
                    wstart = cumath.floor(pw * bin_size_w)
                    hend = cumath.ceil((ph + 1) * bin_size_h)
                    wend = cumath.ceil((pw + 1) * bin_size_w)

                    hstart = gpuarray.minimum(
                        gpuarray.maximum(hstart + roi_start_h, 0), height)
                    hend = gpuarray.minimum(
                        gpuarray.maximum(hend + roi_start_h, 0), height)
                    wstart = gpuarray.minimum(
                        gpuarray.maximum(wstart + roi_start_w, 0), width)
                    wend = gpuarray.minimum(
                        gpuarray.maximum(wend + roi_start_w, 0), width)

                    is_empty = (hend <= hstart) + (wend <= wstart)

                    pool_index = ph * pooled_width + pw
                    if (is_empty.get()):
                        roi_pool_conv5[c, pool_index] = 0

                    for h in range(int(hstart.get()), int(hend.get())):
                        for w in range(int(wstart.get()), int(wend.get())):
                            #							index = h * width + w
                            if ((feature_maps_gpu[0, h, w, c] >
                                 roi_pool_conv5[c, pool_index]).get()):
                                roi_pool_conv5[
                                    c, pool_index] = feature_maps_gpu[0, h, w,
                                                                      c]

        roi_pool_conv5s[i] = roi_pool_conv5.reshape([9216])

    return roi_pool_conv5s
c = gpuarray.empty((100, 100), dtype=dtype)
print('c:\n{0}\nshape={1}\n'.format(c, c.shape))

d = gpuarray.zeros((100, 100), dtype=dtype)
print('d:\n{0}\nshape={1}\n'.format(d, d.shape))

e = gpuarray.arange(0.0, 100.0, 1.0, dtype=dtype)
print('e:\n{0}\nshape={1}\n'.format(e, e.shape))

f = gpuarray.if_positive(e < 50, e - 100, e + 100)
print('f:\n{0}\nshape={1}\n'.format(f, f.shape))

g = gpuarray.if_positive(e < 50, gpuarray.ones_like(e), gpuarray.zeros_like(e))
print('g:\n{0}\nshape={1}\n'.format(g, g.shape))

h = gpuarray.maximum(e, f)
print('h:\n{0}\nshape={1}\n'.format(h, h.shape))

i = gpuarray.minimum(e, f)
print('i:\n{0}\nshape={1}\n'.format(i, i.shape))

g = gpuarray.sum(a)
print(g, type(g))

k = gpuarray.max(a)
print(k, type(k))

l = gpuarray.min(a)
print(l, type(l))
Example #9
0
def MIN(inp, inout):
    gpu.minimum(inp, inout, out=inout)