def minimum_cuda(a, b=None): """Minimum values of two GPUArrays. Parameters ---------- a : gpuarray First GPUArray. b : gpuarray Second GPUArray. Returns ------- gpuarray Minimum values from both GPArrays, or single value if one GPUarray. Examples -------- >>> a = minimum_cuda(give_cuda([1, 2, 3]), give_cuda([3, 2, 1])) [1, 2, 1] >>> type(a) <class 'pycuda.gpuarray.GPUArray'> """ if b is not None: return cuda_array.minimum(a, b) return cuda_array.min(a)
def test_minimum_maximum_scalar(self): from pycuda.curandom import rand as curand l = 20 a_gpu = curand((l,)) a = a_gpu.get() import pycuda.gpuarray as gpuarray max_a0_gpu = gpuarray.maximum(a_gpu, 0) min_a0_gpu = gpuarray.minimum(0, a_gpu) assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0 assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0
def test_minimum_maximum_scalar(self): from pycuda.curandom import rand as curand sz = 20 a_gpu = curand((sz, )) a = a_gpu.get() import pycuda.gpuarray as gpuarray max_a0_gpu = gpuarray.maximum(a_gpu, 0) min_a0_gpu = gpuarray.minimum(0, a_gpu) assert la.norm(max_a0_gpu.get() - np.maximum(a, 0)) == 0 assert la.norm(min_a0_gpu.get() - np.minimum(0, a)) == 0
def test_if_positive(self): from pycuda.curandom import rand as curand sz = 20 a_gpu = curand((sz, )) b_gpu = curand((sz, )) a = a_gpu.get() b = b_gpu.get() import pycuda.gpuarray as gpuarray max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu) min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu) print(max_a_b_gpu) print(np.maximum(a, b)) assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0 assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
def test_if_positive(self): from pycuda.curandom import rand as curand l = 20 a_gpu = curand((l,)) b_gpu = curand((l,)) a = a_gpu.get() b = b_gpu.get() import pycuda.gpuarray as gpuarray max_a_b_gpu = gpuarray.maximum(a_gpu, b_gpu) min_a_b_gpu = gpuarray.minimum(a_gpu, b_gpu) print (max_a_b_gpu) print((np.maximum(a, b))) assert la.norm(max_a_b_gpu.get() - np.maximum(a, b)) == 0 assert la.norm(min_a_b_gpu.get() - np.minimum(a, b)) == 0
def demosaick_gpu(img): img = gp.to_gpu(img) p2x = im2col(img, _i2c2) cm.log(img + _eps, out=img) p1x = im2col(img, _i2c1) wA = p1x.shape[0] wB = p2x.shape[0] hA = p1x.shape[1] hB = p2x.shape[1] # Path 1 p1x = p1x.reshape([wA * hA, 576]) p1y = lg.dot(p1x, _wts.int1) cm.exp(p1y, out=p1y) p1y = p1y.reshape([wA * hA * 64, 3 * _ofac]) p1x = lg.dot(p1y, _wts.int2) msc.add_matvec(p1x, _wts.int2b, out=p1x) p1x = p1x.reshape([wA * hA * 64 * 3, _ofac]) # Path 2 # conv1 p2x = p2x.reshape([wB * hB, 64]) p2y = lg.dot(p2x, _wts.c1) msc.add_matvec(p2y, _wts.c1b, out=p2y) gp.maximum(p2y, 0., p2y) p2y = p2y.reshape([wB, hB, _numsel]) # conv2 shI = [wB - 1, hB - 1, _numsel] shM = [(wB - 1) * (hB - 1), _numsel] p2x = gp.empty(shM, dtype=np.float32) pTT = gp.empty(shI, dtype=np.float32) pTT = pTT.reshape(shI) pTT[...] = p2y[0:-1, 0:-1, :] pTT = pTT.reshape(shM) p2x = lg.dot(pTT, _wts.c200) pTT = pTT.reshape(shI) pTT[...] = p2y[0:-1, 1:, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c201, p2x) pTT = pTT.reshape(shI) pTT[...] = p2y[1:, 0:-1, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c210, p2x) pTT = pTT.reshape(shI) pTT[...] = p2y[1:, 1:, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c211, p2x) msc.add_matvec(p2x, _wts.c2b, out=p2x) gp.maximum(p2x, 0., p2x) p2x = p2x.reshape(shI) # conv 3 shI = [wB - 2, hB - 2, _numsel] shM = [(wB - 2) * (hB - 2), _numsel] p2y = gp.empty(shM, dtype=np.float32) pTT = gp.empty(shI, dtype=np.float32) pTT = pTT.reshape(shI) pTT[...] = p2x[0:-1, 0:-1, :] pTT = pTT.reshape(shM) p2y = lg.dot(pTT, _wts.c300) pTT = pTT.reshape(shI) pTT[...] = p2x[0:-1, 1:, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c301, p2y) pTT = pTT.reshape(shI) pTT[...] = p2x[1:, 0:-1, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c310, p2y) pTT = pTT.reshape(shI) pTT[...] = p2x[1:, 1:, :] pTT = pTT.reshape(shM) lg.add_dot(pTT, _wts.c311, p2y) msc.add_matvec(p2y, _wts.c3b, out=p2y) gp.maximum(p2y, 0., p2y) p2x = lg.dot(p2y, _wts.sout) msc.add_matvec(p2x, _wts.soutb, out=p2x) gp.maximum(p2x, 0., p2x) p2x = p2x.reshape(p1x.shape) # Combine p1x *= p2x p1 = msc.sum(p1x, axis=1) gp.maximum(p1, 0., p1) gp.minimum(p1, 1., p1) p1 = p1.reshape([wA, hA, 64 * 3]) im = p2im(p1.get()) return im
def roi_pool(feature_maps, input_rois): feature_maps_gpu = gpuarray.to_gpu(feature_maps) input_rois_gpu = gpuarray.to_gpu(input_rois) roi_pool_conv5 = gpuarray.to_gpu(np.zeros([256, 36], dtype=np.float32)) pooled_height = 6 pooled_width = 6 spatial_scale = 0.0625 batch_size, height, width, channels = feature_maps_gpu.shape num_rois = input_rois_gpu.shape[0] roi_pool_conv5s = gpuarray.to_gpu( np.zeros([num_rois, 9216], dtype=np.float32)) for i in range(num_rois): # roi_batch_ind = input_rois_gpu[i, 0] roi_start_w = cumath.floor(input_rois_gpu[i, 1] * spatial_scale) # should be round() roi_start_h = cumath.floor(input_rois_gpu[i, 2] * spatial_scale) # should be round() roi_end_w = cumath.floor(input_rois_gpu[i, 3] * spatial_scale) # should be round() roi_end_h = cumath.floor(input_rois_gpu[i, 4] * spatial_scale) # should be round() roi_height = gpuarray.maximum(roi_end_h - roi_start_h + 1, 1) roi_width = gpuarray.maximum(roi_end_w - roi_start_w + 1, 1) bin_size_h = roi_height / float(pooled_height) bin_size_w = roi_width / float(pooled_width) for c in range(channels): for ph in range(pooled_height): for pw in range(pooled_width): hstart = cumath.floor(ph * bin_size_h) wstart = cumath.floor(pw * bin_size_w) hend = cumath.ceil((ph + 1) * bin_size_h) wend = cumath.ceil((pw + 1) * bin_size_w) hstart = gpuarray.minimum( gpuarray.maximum(hstart + roi_start_h, 0), height) hend = gpuarray.minimum( gpuarray.maximum(hend + roi_start_h, 0), height) wstart = gpuarray.minimum( gpuarray.maximum(wstart + roi_start_w, 0), width) wend = gpuarray.minimum( gpuarray.maximum(wend + roi_start_w, 0), width) is_empty = (hend <= hstart) + (wend <= wstart) pool_index = ph * pooled_width + pw if (is_empty.get()): roi_pool_conv5[c, pool_index] = 0 for h in range(int(hstart.get()), int(hend.get())): for w in range(int(wstart.get()), int(wend.get())): # index = h * width + w if ((feature_maps_gpu[0, h, w, c] > roi_pool_conv5[c, pool_index]).get()): roi_pool_conv5[ c, pool_index] = feature_maps_gpu[0, h, w, c] roi_pool_conv5s[i] = roi_pool_conv5.reshape([9216]) return roi_pool_conv5s
c = gpuarray.empty((100, 100), dtype=dtype) print('c:\n{0}\nshape={1}\n'.format(c, c.shape)) d = gpuarray.zeros((100, 100), dtype=dtype) print('d:\n{0}\nshape={1}\n'.format(d, d.shape)) e = gpuarray.arange(0.0, 100.0, 1.0, dtype=dtype) print('e:\n{0}\nshape={1}\n'.format(e, e.shape)) f = gpuarray.if_positive(e < 50, e - 100, e + 100) print('f:\n{0}\nshape={1}\n'.format(f, f.shape)) g = gpuarray.if_positive(e < 50, gpuarray.ones_like(e), gpuarray.zeros_like(e)) print('g:\n{0}\nshape={1}\n'.format(g, g.shape)) h = gpuarray.maximum(e, f) print('h:\n{0}\nshape={1}\n'.format(h, h.shape)) i = gpuarray.minimum(e, f) print('i:\n{0}\nshape={1}\n'.format(i, i.shape)) g = gpuarray.sum(a) print(g, type(g)) k = gpuarray.max(a) print(k, type(k)) l = gpuarray.min(a) print(l, type(l))
def MIN(inp, inout): gpu.minimum(inp, inout, out=inout)