def _impl_test_argminmax(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert_allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype]) assert_allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1), rtol=dtype_to_rtol[dtype], atol=dtype_to_atol[dtype])
def _impl_test_argminmax(self, dtype): x = np.random.normal(scale=5.0, size=(3, 5)) x = x.astype(dtype=dtype, order='C') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0)) assert np.allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1)) assert np.allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0)) assert np.allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1)) x = x.astype(dtype=dtype, order='F') x_gpu = gpuarray.to_gpu(x) assert np.allclose(misc.argmax(x_gpu, axis=0).get(), x.argmax(axis=0)) assert np.allclose(misc.argmax(x_gpu, axis=1).get(), x.argmax(axis=1)) assert np.allclose(misc.argmin(x_gpu, axis=0).get(), x.argmin(axis=0)) assert np.allclose(misc.argmin(x_gpu, axis=1).get(), x.argmin(axis=1))
def match(self, inp): l = (len(inp)/8)-2 x = l & -l # highest power of two which divides l, up to 8 y = min(1024/x, self.n) cuda.memcpy_htod(self.input_gpu, inp.astype(numpy.float32)) PatternCUDA.correlate(self.input_gpu, self.patterns_gpu, self.result_gpu, numpy.int32(self.start+2), numpy.int32(self.end-1), block=(x, y, 1), grid=(l/x, self.n/y)) result = argmin(self.result_gpu, axis=1).get() return self.bytes[result[:l],0]
def get_assignments(self, data): dists = self.get_distances_to_centers(data) return cumisc.argmin(dists, 1).get().astype(np.int32)