def test_take(self): idx = gpuarray.arange(0, 10000, 2, dtype=np.uint32) for dtype in [np.float32, np.complex64]: a = gpuarray.arange(0, 600000, dtype=np.uint32).astype(dtype) a_host = a.get() result = gpuarray.take(a, idx) assert (a_host[idx.get()] == result.get()).all()
def unpermute(self, x): return gpuarray.take(x, self.old2new_fetch_indices)
def permute(self, x): return gpuarray.take(x, self.new2old_fetch_indices)
scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32)) probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32)) maxscores = gpuarray.empty((B,), dtype=np.float32) maxscoreids = gpuarray.empty((B,), dtype=np.uint32) deltas = gpuarray.empty_like(scores) sumdeltas = gpuarray.empty((B,), dtype=np.float32) cpu_probs = np.empty((B, K), dtype=np.float32) indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32) gpu_ind = gpuarray.empty((B,), dtype=np.uint32) selected_probs = gpuarray.empty((B,), dtype=np.float32) for i in range(10): gpu_ind.set(indices[i]) gpuarray.take(probs, gpu_ind, out=selected_probs) utils.scalar_sub(selected_probs, 1.0, selected_probs) gpuarray.multi_put([selected_probs], gpu_ind, out=[probs]) #print probs t1 = time.clock() for i in range(N): # get the softmax probs first utils.max(scores, 1, maxscores, maxscoreids) utils.sub_matvec(scores, maxscores, 0, deltas) cumath.exp(deltas, out=deltas) scm.sum(deltas, 1, sumdeltas) utils.div_matvec(deltas, sumdeltas, 0, probs) # probs.get(cpu_probs)
def test_take(self): idx = gpuarray.arange(0, 200000, 2, dtype=np.uint32) a = gpuarray.arange(0, 600000, 3, dtype=np.float32) result = gpuarray.take(a, idx) assert ((3 * idx).get() == result.get()).all()
scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32)) probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32)) maxscores = gpuarray.empty((B, ), dtype=np.float32) maxscoreids = gpuarray.empty((B, ), dtype=np.uint32) deltas = gpuarray.empty_like(scores) sumdeltas = gpuarray.empty((B, ), dtype=np.float32) cpu_probs = np.empty((B, K), dtype=np.float32) indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32) gpu_ind = gpuarray.empty((B, ), dtype=np.uint32) selected_probs = gpuarray.empty((B, ), dtype=np.float32) for i in range(10): gpu_ind.set(indices[i]) gpuarray.take(probs, gpu_ind, out=selected_probs) utils.scalar_sub(selected_probs, 1.0, selected_probs) gpuarray.multi_put([selected_probs], gpu_ind, out=[probs]) #print probs t1 = time.clock() for i in range(N): # get the softmax probs first utils.max(scores, 1, maxscores, maxscoreids) utils.sub_matvec(scores, maxscores, 0, deltas) cumath.exp(deltas, out=deltas) scm.sum(deltas, 1, sumdeltas) utils.div_matvec(deltas, sumdeltas, 0, probs) # probs.get(cpu_probs)
def test_take(self): idx = gpuarray.arange(0, 200000, 2, dtype=np.uint32) a = gpuarray.arange(0, 600000, 3, dtype=np.float32) result = gpuarray.take(a, idx) assert ((3*idx).get() == result.get()).all()