def test_multi_gpu(self): with cuda.Device(0): a = cupy.zeros((10,)) cupy.core.core.scan(a) with cuda.Device(1): a = cupy.zeros((10,)) cupy.core.core.scan(a)
def basicComputeLoop(self, imp, params, inputSize, columnDimensions, seed = None): """ Feed in some vectors and retrieve outputs. Ensure the right number of columns win, that we always get binary outputs, and that nothing crashes. """ sp = CreateSP(imp,params) # Create a set of input vectors as well as various numpy vectors we will # need to retrieve data from the SP numRecords = 100 randomState = getNumpyRandomGenerator(seed) inputMatrix = ( randomState.rand(numRecords,inputSize) > 0.8).astype(uintType) y = cupy.zeros(columnDimensions, dtype = uintType) dutyCycles = cupy.zeros(columnDimensions, dtype = uintType) # With learning on we should get the requested number of winners for v in inputMatrix: y.fill(0) sp.compute(v, True, y) self.assertEqual(sp.getNumActiveColumnsPerInhArea(),y.sum()) self.assertEqual(0,y.min()) self.assertEqual(1,y.max()) # With learning off and some prior training we should get the requested # number of winners for v in inputMatrix: y.fill(0) sp.compute(v, False, y) self.assertEqual(sp.getNumActiveColumnsPerInhArea(),y.sum()) self.assertEqual(0,y.min()) self.assertEqual(1,y.max())
def bincount(x, weights=None, minlength=None): """Count number of occurrences of each value in array of non-negative ints. Args: x (cupy.ndarray): Input array. weights (cupy.ndarray): Weights array which has the same shape as ``x``. minlength (int): A minimum number of bins for the output array. Returns: cupy.ndarray: The result of binning the input array. The length of output is equal to ``max(cupy.max(x) + 1, minlength)``. .. seealso:: :func:`numpy.bincount` """ if x.ndim > 1: raise ValueError('object too deep for desired array') if x.ndim < 1: raise ValueError('object of too small depth for desired array') if x.dtype.kind == 'f': raise TypeError('x must be int array') if (x < 0).any(): raise ValueError('The first argument of bincount must be non-negative') if weights is not None and x.shape != weights.shape: raise ValueError('The weights and list don\'t have the same length.') if minlength is not None: minlength = int(minlength) if minlength <= 0: raise ValueError('minlength must be positive') size = int(cupy.max(x)) + 1 if minlength is not None: size = max(size, minlength) if weights is None: # atomicAdd for int64 is not provided b = cupy.zeros((size,), dtype=cupy.int32) cupy.ElementwiseKernel( 'S x', 'raw U bin', 'atomicAdd(&bin[x], 1)', 'bincount_kernel' )(x, b) b = b.astype(numpy.intp) else: # atomicAdd for float64 is not provided b = cupy.zeros((size,), dtype=cupy.float32) cupy.ElementwiseKernel( 'S x, T w', 'raw U bin', 'atomicAdd(&bin[x], w)', 'bincount_with_weight_kernel' )(x, weights, b) b = b.astype(cupy.float64) return b
def test_22(self): N = 32 M = 4 Nd = 8 D = cp.random.randn(Nd, Nd, M) D /= cp.sqrt(cp.sum(D**2, axis=(0, 1))) X0 = cp.zeros((N, N, M)) xr = cp.random.randn(N, N, M) xp = cp.abs(xr) > 3 X0[xp] = cp.random.randn(X0[xp].size) S = cp.sum(sl.fftconv(D, X0), axis=2) lmbda = 1e-3 opt = cbpdn.ConvBPDN.Options( {'Verbose': False, 'MaxMainIter': 500, 'RelStopTol': 1e-5, 'rho': 5e-1, 'AutoRho': {'Enabled': False}}) bp = cbpdn.ConvBPDN(D, S, lmbda, opt) Xp = bp.solve() epsilon = cp.linalg.norm(bp.reconstruct(Xp).squeeze() - S) opt = cbpdn.ConvMinL1InL2Ball.Options( {'Verbose': False, 'MaxMainIter': 500, 'RelStopTol': 1e-5, 'rho': 2e2, 'RelaxParam': 1.0, 'AutoRho': {'Enabled': False}}) bc = cbpdn.ConvMinL1InL2Ball(D, S, epsilon=epsilon, opt=opt) Xc = bc.solve() assert cp.linalg.norm(Xp - Xc) / cp.linalg.norm(Xp) < 1e-3 assert(cp.abs(cp.linalg.norm(Xp.ravel(), 1) - cp.linalg.norm(Xc.ravel(), 1)) < 1e-3)
def test_adv_getitem_cupy_indices2(self): shape = (2, 3, 4) a = cupy.zeros(shape) index = cupy.array([1, 0]) b = a[(slice(None), index)] b_cpu = a.get()[(slice(None), index.get())] testing.assert_array_equal(b, b_cpu)
def diag(v, k=0): """Returns a diagonal or a diagonal array. Args: v (array-like): Array or array-like object. k (int): Index of diagonals. Zero indicates the main diagonal, a positive value an upper diagonal, and a negative value a lower diagonal. Returns: cupy.ndarray: If ``v`` indicates a 1-D array, then it returns a 2-D array with the specified diagonal filled by ``v``. If ``v`` indicates a 2-D array, then it returns the specified diagonal of ``v``. In latter case, if ``v`` is a cupy.ndarray object, then its view is returned. .. seealso:: :func:`numpy.diag` """ if isinstance(v, cupy.ndarray): if v.ndim == 1: size = v.size + abs(k) ret = cupy.zeros((size, size), dtype=v.dtype) ret.diagonal(k)[:] = v return ret else: return v.diagonal(k) else: return cupy.array(numpy.diag(v, k))
def test_scatter_add_cupy_arguments(self, dtype): shape = (2, 3) a = cupy.zeros(shape, dtype) slices = (cupy.array([1, 1]), slice(None)) a.scatter_add(slices, cupy.array(1.)) testing.assert_array_equal( a, cupy.array([[0., 0., 0.], [2., 2., 2.]], dtype))
def testVeryFewScores(self): """ This calls estimateAnomalyLikelihoods and updateAnomalyLikelihoods with one or no scores. """ # Generate an estimate using two data points data1 = _generateSampleData(mean=42.0, variance=1e-10) _, _, estimatorParams = ( an.estimateAnomalyLikelihoods(data1[0:2]) ) self.assertTrue(an.isValidEstimatorParams(estimatorParams)) # Check that the estimated mean is that value dParams = estimatorParams["distribution"] self.assertWithinEpsilon(dParams["mean"], data1[0][2]) # Can't generate an estimate using no data points data1 = cupy.zeros(0) with self.assertRaises(ValueError): an.estimateAnomalyLikelihoods(data1) # Can't update with no scores with self.assertRaises(ValueError): an.updateAnomalyLikelihoods(data1, estimatorParams)
def testSamplePopulationTooSmall(self): r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([5], dtype="uint32") self.assertRaises( ValueError, r.sample, population, choices)
def predict(self,x,train=False, ratio = 0.5): y = xp.zeros(OUTPUT_NODE) for i in range(KIND_OF_HEADS-1): y += self.heads[i][0].predict(x,train,ratio).data[0].copy()/(KIND_OF_HEADS-1) y /= 2.0 y += self.heads[KIND_OF_HEADS-1][0].predict(x,train,ratio).data[0].copy()/2.0 return y
def testShuffleEmpty(self): r = Random(42) arr = cupy.zeros([0], dtype="uint32") r.shuffle(arr) self.assertEqual(arr.size, 0)
def testSampleWrongDimensionsChoices(self): """Check that passing a multi-dimensional array throws a ValueError.""" r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([2, 2], dtype="uint32") self.assertRaises(ValueError, r.sample, population, choices)
def test_scatter_add_cupy_arguments_mask(self, dtype): shape = (2, 3) a = cupy.zeros(shape, dtype) slices = (cupy.array([True, False]), slice(None)) a.scatter_add(slices, cupy.array(1.)) testing.assert_array_equal( a, cupy.array([[1., 1., 1.], [0., 0., 0.]], dtype))
def test_adv_getitem_cupy_indices3(self): shape = (2, 3, 4) a = cupy.zeros(shape) index = cupy.array([True, False]) b = a[index] b_cpu = a.get()[index.get()] testing.assert_array_equal(b, b_cpu)
def test_cupy_indices_integer_array(self): shape = (2, 3) a = cupy.zeros(shape) indexes = cupy.array([0, 1]) a[:, indexes] = cupy.array(1.) testing.assert_array_equal( a, cupy.array([[1., 1., 0.], [1., 1., 0.]]))
def test_cupy_indices_boolean_array(self): shape = (2, 3) a = cupy.zeros(shape) indexes = cupy.array([True, False]) a[indexes] = cupy.array(1.) testing.assert_array_equal( a, cupy.array([[1., 1., 1.], [0., 0., 0.]]))
def testSample(self): r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([2], dtype="uint32") r.sample(population, choices) self.assertEqual(choices[0], 1) self.assertEqual(choices[1], 3)
def testSampleNone(self): r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([0], dtype="uint32") # Just make sure there is no exception thrown. r.sample(population, choices) self.assertEqual(choices.size, 0)
def zeros(shape, dtype=numpy.float32, stream=None): """Creates a zero-filled cupy.ndarray object. This function is equivalent to ``full(shape, 0, dtype, stream)``. """ warnings.warn("chainer.cuda.zeros is deprecated. Use cupy.zeros instead.", DeprecationWarning) check_cuda_available() assert stream is None return cupy.zeros(shape, dtype=dtype)
def test_scatter_add_differnt_dtypes(self, src_dtype, dst_dtype): shape = (2, 3) a = cupy.zeros(shape, dtype=src_dtype) value = cupy.array(1, dtype=dst_dtype) slices = ([1, 1], slice(None)) a.scatter_add(slices, value) numpy.testing.assert_almost_equal( a.get(), numpy.array([[0, 0, 0], [2, 2, 2]], dtype=src_dtype))
def test_scatter_add_differnt_dtypes_mask(self, src_dtype, dst_dtype): shape = (2, 3) a = cupy.zeros(shape, dtype=src_dtype) value = cupy.array(1, dtype=dst_dtype) slices = (numpy.array([[True, False, False], [False, True, True]])) a.scatter_add(slices, value) numpy.testing.assert_almost_equal( a.get(), numpy.array([[1, 0, 0], [0, 1, 1]], dtype=src_dtype))
def get_features(docs, max_length): docs = list(docs) Xs = xp.zeros((len(docs), max_length), dtype='i') for i, doc in enumerate(docs): j = 0 for token in doc: if token.has_vector and not token.is_punct and not token.is_space: Xs[i, j] = token.norm j += 1 if j >= max_length: break return Xs
def _getSimplePatterns(numOnes, numPatterns): """Very simple patterns. Each pattern has numOnes consecutive bits on. There are numPatterns*numOnes bits in the vector. These patterns are used as elements of sequences when building up a training set.""" numCols = numOnes * numPatterns p = [] for i in xrange(numPatterns): x = np.zeros(numCols, dtype='float32') x[i*numOnes:(i + 1)*numOnes] = 1 p.append(x) return p
def interval(self, mx, size): """Generate multiple integers independently sampled uniformly from ``[0, mx]``. Args: mx (int): Upper bound of the interval size (None or int or tuple): Shape of the array or the scalar returned. Returns: int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with shape ``()`` is returned. If ``int``, 1-D array of length size is returned. If ``tuple``, multi-dimensional array with shape ``size`` is returned. Currently, each element of the array is ``numpy.int32``. """ dtype = numpy.int32 if size is None: return self.interval(mx, 1).reshape(()) elif isinstance(size, int): size = (size, ) if mx == 0: return cupy.zeros(size, dtype=dtype) mask = (1 << mx.bit_length()) - 1 mask = cupy.array(mask, dtype=dtype) ret = cupy.zeros(size, dtype=dtype) sample = cupy.zeros(size, dtype=dtype) done = cupy.zeros(size, dtype=numpy.bool_) while True: curand.generate( self._generator, sample.data.ptr, sample.size) sample &= mask success = sample <= mx ret = cupy.where(success, sample, ret) done |= success if done.all(): return ret
def check_normal(self, curand_func, dtype): shape = core.get_size(self.size) exp_size = six.moves.reduce(operator.mul, shape, 1) if exp_size % 2 == 1: exp_size += 1 curand_func.return_value = cupy.zeros(exp_size, dtype=dtype) out = self.rs.normal(self.args[0], self.args[1], self.size, dtype) gen, _, size, loc, scale = curand_func.call_args[0] self.assertIs(gen, self.rs._generator) self.assertEqual(size, exp_size) self.assertIs(loc, self.args[0]) self.assertIs(scale, self.args[1]) self.assertEqual(out.shape, shape)
def getNoWhitespaces(IDs, whitespace_IDs): no_whitespace_ids = whitespace_IDs.shape[0] T = IDs.shape[0] N = IDs.shape[1] no_whitespaces = cp.zeros((N,), dtype=np.int32) _GetNoWhitespaces = _GetNoWhitespaces_kernel() bdim, gdim = (16,1,1), (1,N,1) _GetNoWhitespaces(grid=gdim, block=bdim, args=(IDs, whitespace_IDs, no_whitespaces, no_whitespace_ids, T, N) ) return no_whitespaces
def zeros_like(array, stream=None): """Creates a zero-filled cupy.ndarray object like the given array. Args: array (cupy.ndarray or numpy.ndarray): Base array. stream (cupy.cuda.Stream): CUDA stream. Returns: cupy.ndarray: Zero-filled array. """ warnings.warn("chainer.cuda.zeros_like is deprecated. Use cupy.zeros_like instead.", DeprecationWarning) check_cuda_available() assert stream is None if isinstance(array, cupy.ndarray): return cupy.zeros_like(array) return cupy.zeros(array.shape, dtype=array.dtype)
def _call_nms_kernel(bbox, thresh): n_bbox = bbox.shape[0] threads_per_block = 64 col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32) blocks = (col_blocks, col_blocks, 1) threads = (threads_per_block, 1, 1) mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64) bbox = cp.ascontiguousarray(bbox, dtype=np.float32) kern = _load_kernel('nms_kernel', _nms_gpu_code) kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh), bbox, mask_dev)) mask_host = mask_dev.get() selection, n_selec = _nms_gpu_post( mask_host, n_bbox, threads_per_block, col_blocks) return selection, n_selec
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None): if len(bbox) == 0: return cp.zeros((0,), dtype=np.int32) n_bbox = bbox.shape[0] if score is not None: order = score.argsort()[::-1].astype(np.int32) else: order = cp.arange(n_bbox, dtype=np.int32) sorted_bbox = bbox[order, :] selec, n_selec = _call_nms_kernel( sorted_bbox, thresh) selec = selec[:n_selec] selec = order[selec] if limit is not None: selec = selec[:limit] return selec
def testGetstateSetstate(self): nDims = 32 # need multiple of 8, because of sse nClass = 4 size = 20 labels = _RGEN.random_integers(0, nClass - 1, size) samples = np.zeros((size, nDims), dtype=_DTYPE) centers = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) for i in range(0, size): t = 6.28 * _RGEN.random_sample() samples[i][0] = 2 * centers[labels[i]][0] + 0.5*_RGEN.rand() * np.cos(t) samples[i][1] = 2 * centers[labels[i]][1] + 0.5*_RGEN.rand() * np.sin(t) classifier = svm_dense(0, nDims, seed=_SEED, probability = True) for y, xList in zip(labels, samples): x = np.array(xList, dtype=_DTYPE) classifier.add_sample(float(y), x) classifier.train(gamma=1.0/3.0, C=100, eps=1e-1) classifier.cross_validate(2, gamma=0.5, C=10, eps=1e-3) s1 = classifier.__getstate__() h1 = hashlib.md5(s1).hexdigest() classifier2 = svm_dense(0, nDims) classifier2.__setstate__(s1) s2 = classifier2.__getstate__() h2 = hashlib.md5(s2).hexdigest() self.assertEqual(h1, h2) with open("svm_test.bin", "wb") as f: pickle.dump(classifier, f) with open("svm_test.bin", "rb") as f: classifier3 = pickle.load(f) s3 = classifier3.__getstate__() h3 = hashlib.md5(s3).hexdigest() self.assertEqual(h1, h3) os.unlink("svm_test.bin")
def affine_transform( input, matrix, offset=0.0, output_shape=None, output=None, order=3, mode="constant", cval=0.0, prefilter=True, *, allow_float32=True, ): """Apply an affine transformation. Given an output image pixel index vector ``o``, the pixel value is determined from the input image at position ``cupy.dot(matrix, o) + offset``. Args: input (cupy.ndarray): The input array. matrix (cupy.ndarray): The inverse coordinate transformation matrix, mapping output coordinates to input coordinates. If ``ndim`` is the number of dimensions of ``input``, the given matrix must have one of the following shapes: - ``(ndim, ndim)``: the linear transformation matrix for each output coordinate. - ``(ndim,)``: assume that the 2D transformation matrix is diagonal, with the diagonal specified by the given value. - ``(ndim + 1, ndim + 1)``: assume that the transformation is specified using homogeneous coordinates. In this case, any value passed to ``offset`` is ignored. - ``(ndim, ndim + 1)``: as above, but the bottom row of a homogeneous transformation matrix is always ``[0, 0, ..., 1]``, and may be omitted. offset (float or sequence): The offset into the array where the transform is applied. If a float, ``offset`` is the same for each axis. If a sequence, ``offset`` should contain one value for each axis. output_shape (tuple of ints): Shape tuple. output (cupy.ndarray or ~cupy.dtype): The array in which to place the output, or the dtype of the returned array. order (int): The order of the spline interpolation. Must be between 0 and 5. mode (str): Points outside the boundaries of the input are filled according to the given mode (``'constant'``, ``'nearest'``, ``'mirror'`` or ``'opencv'``). Default is ``'constant'``. cval (scalar): Value used for points outside the boundaries of the input if ``mode='constant'`` or ``mode='opencv'``. Default is 0.0 prefilter (bool): It is not used yet. It just exists for compatibility with :mod:`scipy.ndimage`. Returns: cupy.ndarray or None: The transformed input. If ``output`` is given as a parameter, ``None`` is returned. Notes ----- This implementation handles boundary modes 'wrap' and 'reflect' correctly, while SciPy does not (at least as of release 1.4.0). So, if comparing to SciPy, some disagreement near the borders may occur unless ``mode == 'mirror'``. For ``order > 1`` with ``prefilter == True``, the spline prefilter boundary conditions are implemented correctly only for modes 'mirror', 'reflect' and 'wrap'. For the other modes ('constant' and 'nearest'), there is some innacuracy near the boundary of the array. .. seealso:: :func:`scipy.ndimage.affine_transform` """ _check_parameter("affine_transform", order, mode) if not hasattr(offset, "__iter__") and type(offset) is not cupy.ndarray: offset = [offset] * input.ndim matrix = cupy.asarray(matrix, order="C", dtype=float) if matrix.ndim not in [1, 2]: raise RuntimeError("no proper affine matrix provided") if matrix.ndim == 2: if matrix.shape[0] == matrix.shape[1] - 1: offset = matrix[:, -1] matrix = matrix[:, :-1] elif matrix.shape[0] == input.ndim + 1: offset = matrix[:-1, -1] matrix = matrix[:-1, :-1] if mode == "opencv": m = cupy.zeros((input.ndim + 1, input.ndim + 1), dtype=float) m[:-1, :-1] = matrix m[:-1, -1] = offset m[-1, -1] = 1 m = cupy.linalg.inv(m) m[:2] = cupy.roll(m[:2], 1, axis=0) m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1) matrix = m[:-1, :-1] offset = m[:-1, -1] if output_shape is None: output_shape = input.shape matrix = matrix.astype(float, copy=False) if order is None: order = 1 ndim = input.ndim output = _get_output(output, input, shape=output_shape) if input.dtype.kind in "iu": input = input.astype(cupy.float32) if prefilter and order > 1: padded, npad = _prepad_for_spline_filter(input, mode, cval) filtered = spline_filter( padded, order, output=input.dtype, mode=mode, allow_float32=allow_float32, ) else: npad = 0 filtered = input # kernel assumes C-contiguous arrays if not filtered.flags.c_contiguous: filtered = cupy.ascontiguousarray(filtered) if not matrix.flags.c_contiguous: matrix = cupy.ascontiguousarray(matrix) integer_output = output.dtype.kind in "iu" large_int = (max(_misc._prod(input.shape), _misc._prod(output_shape)) > 1 << 31) if matrix.ndim == 1: offset = cupy.asarray(offset, dtype=float, order="C") offset = -offset / matrix kern = _get_zoom_shift_kernel( ndim, large_int, output_shape, mode, cval=cval, order=order, integer_output=integer_output, nprepad=npad, ) kern(filtered, offset, matrix, output) else: kern = _get_affine_kernel( ndim, large_int, output_shape, mode, cval=cval, order=order, integer_output=integer_output, nprepad=npad, ) m = cupy.zeros((ndim, ndim + 1), dtype=float) m[:, :-1] = matrix m[:, -1] = cupy.asarray(offset, dtype=float) kern(filtered, m, output) return output
def call_adam(grad, data, state_m, state_v): adam(grad, hp.lr, 1 - hp.beta1, 1 - hp.beta2, hp.eps, hp.eta, hp.weight_decay_rate, data, state_m, state_v) def call_adam_fuse(grad, data, state_m, state_v): adam_fuse(grad, numpy.float32(hp.lr), numpy.float32(1 - hp.beta1), numpy.float32(1 - hp.beta2), numpy.float32(hp.eps), numpy.float32(hp.eta), numpy.float32(hp.weight_decay_rate), data, state_m, state_v) sizes = [1, 10, 100, 1000, 2000, 5000] for size in sizes: zero = cupy.zeros((size, size)) def f(): call_adam(zero, zero, zero, zero) util.measure(f, "adam , %4d" % (size), 100) for size in sizes: zero = cupy.zeros((size, size)) def f(): call_adam_fuse(zero, zero, zero, zero) util.measure(f, "adam_fuse, %4d" % (size), 100) for size in sizes[:4]:
rv = np.sqrt(xv**2 + yv**2 + zv**2) thetav = np.arccos(zv / rv) phiv = np.arctan2(yv, xv) del xv, yv, zv, jv, iv # Load arrays onto GPU using CuPy module # theta_tol_array = cp.array(theta_tol_array) # phi_tol_array = cp.array(phi_tol_array) theta_img = cp.array(theta_img) phi_img = cp.array(phi_img) thetav = cp.array(thetav) phiv = cp.array(phiv) rv = cp.array(rv) # Initialize label array labels = cp.zeros((ni, nj, 111)) # Doing all computations on GPU 1 with cp.cuda.Device(1): # Condition to search only in field of view #limit_theta = thetav <= theta_max # Set closeness tolerance # Redefine useful variables so they are on the GPU # Reduces data transfer time ni = 480 nj = 480 theta_max = 4 / 9 * cp.pi ni0 = ni / 2 - 0.5 nj0 = nj / 2 - 0.5 dimk, dimi, dimj = rv.shape start = 76
def test_cuFloatComplex(self): N = 100 block = 32 grid = (N + block - 1) // block dtype = cupy.complex64 mod = cupy.RawModule( code=_test_cuComplex, translate_cucomplex=True) a = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) a = a.astype(dtype) b = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) b = b.astype(dtype) c = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) c = c.astype(dtype) out = cupy.zeros((N,), dtype=dtype) out_float = cupy.zeros((N,), dtype=cupy.float32) out_up = cupy.zeros((N,), dtype=cupy.complex128) ker = mod.get_function('test_addf') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all() ker = mod.get_function('test_subf') ker((grid,), (block,), (a, b, out)) assert (out == a - b).all() ker = mod.get_function('test_mulf') ker((grid,), (block,), (a, b, out)) assert cupy.allclose(out, a * b) ker = mod.get_function('test_divf') ker((grid,), (block,), (a, b, out)) assert (out == a / b).all() ker = mod.get_function('test_conjf') ker((grid,), (block,), (a, out)) assert (out == cupy.conj(a)).all() ker = mod.get_function('test_absf') ker((grid,), (block,), (a, out_float)) assert (out_float == cupy.abs(a)).all() ker = mod.get_function('test_fmaf') ker((grid,), (block,), (a, b, c, out)) assert cupy.allclose(out, a * b + c) ker = mod.get_function('test_makef') ker((grid,), (block,), (out,)) # because of precision issue, the (A==B).all() semantics would fail assert cupy.allclose(out, 1.8 - 1j * 8.7) ker = mod.get_function('test_upcast') ker((grid,), (block,), (a, out_up)) assert (out_up == a.astype(cupy.complex128)).all() # NumPy scalars. b = cupy.complex64(2 + 3j) ker = mod.get_function('test_addf_scalar') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all()
def test_zeros_strides_f(self): a = numpy.zeros((2, 3), dtype='d', order='F') b = cupy.zeros((2, 3), dtype='d', order='F') self.assertEqual(b.strides, a.strides)
def testNotContiguous(self): sbuf = cupy.ones([3, 2])[:, 0] rbuf = cupy.zeros([3]) self.assertRaises((BufferError, ValueError), Sendrecv, sbuf, rbuf)
Nframe = 50 # Saves data every Nframe time steps dt = -1j * 1e-2 # Time step t = 0. # -------------------------------------------------------------------------------------------------------------------- # Generating initial state: # -------------------------------------------------------------------------------------------------------------------- phi = cp.arctan2(Y, X) # Phase is azimuthal angle around the core Tf = sm.get_TF_density_3d(c0, c2, X, Y, Z, N=1) eta = np.where(Z <= 0, 0, 1) # Parameter used to interpolate between states # Generate initial wavefunctions: psiP2 = cp.sqrt(Tf) * cp.sqrt((1 + eta**2)) / 2 psiP1 = cp.zeros((Nx, Ny, Nz)) psi0 = cp.sqrt(Tf) * 1j * cp.sqrt((1 - eta**2) / 2) psiM1 = cp.zeros((Nx, Ny, Nz)) psiM2 = cp.sqrt(Tf) * cp.sqrt((1 + eta**2)) / 2 Psi = [psiP2, psiP1, psi0, psiM1, psiM2] # Full 5x1 wavefunction # Spin rotation on wavefunction: alpha_angle = 0 beta_angle = 0.01 gamma_angle = 0 Psi = sm.rotation(Psi, alpha_angle, beta_angle, gamma_angle) N = [dx * dy * cp.sum(cp.abs(wfn)**2) for wfn in Psi] # Atom number of each component theta_fix = [cp.angle(wfn) for wfn in Psi]
def __init__(self, n_layers, n_samples, n, n_extended, beta, kappa, sigma_0, sigma_v, sigma_scaling, meas_std, evaluation_interval, printProgress, seed, burn_percentage, enable_beta_feedback, pcn_variant, phantom_name, meas_type='tomo', n_theta=50): self.n_samples = n_samples self.evaluation_interval = evaluation_interval self.burn_percentage = burn_percentage #set random seed self.random_seed = seed self.printProgress = printProgress self.n_layers = n_layers self.kappa = kappa self.sigma_0 = sigma_0 self.sigma_v = sigma_v self.sigma_scaling = sigma_scaling self.enable_beta_feedback = enable_beta_feedback cp.random.seed(self.random_seed) #setup parameters for 1 Dimensional simulation self.d = 2 self.nu = 2 - self.d / 2 self.alpha = self.nu + self.d / 2 self.t_start = -0.5 self.t_end = 0.5 self.beta_u = (sigma_0**2) * (2**self.d * util.PI**(self.d / 2) * ssp.gamma(self.alpha)) / ssp.gamma( self.nu) self.beta_v = self.beta_u * (sigma_v / sigma_0)**2 self.sqrtBeta_v = cp.sqrt(self.beta_v).astype('float32') self.sqrtBeta_0 = cp.sqrt(self.beta_u).astype('float32') f = FourierAnalysis_2D(n, n_extended, self.t_start, self.t_end) self.fourier = f rg = RandomGenerator_2D(f.basis_number) self.random_gen = rg LuReal = ((f.Dmatrix * self.kappa**(-self.nu) - self.kappa**(2 - self.nu) * f.Imatrix) / self.sqrtBeta_0).astype('float32') Lu = LuReal + 1j * cp.zeros(LuReal.shape, dtype=cp.float32) uStdev_sym = -1 / cp.diag(Lu) uStdev = uStdev_sym[f.basis_number_2D_ravel - 1:] uStdev[0] /= 2 #scaled if meas_type == 'tomo': self.measurement = Sinogram( phantom_name, target_size=2 * f.extended_basis_number - 1, n_theta=n_theta, stdev=meas_std, relative_location='phantom_images') else: self.measurement = TwoDMeasurement( phantom_name, target_size=2 * f.extended_basis_number - 1, stdev=meas_std, relative_location='phantom_images') self.pcn_variant = pcn_variant self.pcn = pCN(n_layers, rg, self.measurement, f, beta, self.pcn_variant) # self.pcn_pair_layers = pcn_pair_layers self.pcn.record_skip = np.max( cp.array([1, self.n_samples // self.pcn.max_record_history])) history_length = np.min( np.array([self.n_samples, self.pcn.max_record_history])) self.pcn.sqrtBetas_history = np.empty((history_length, self.n_layers), dtype=np.float64) Layers = [] for i in range(self.n_layers): if i == 0: init_sample_sym = uStdev_sym * self.pcn.random_gen.construct_w( ) lay = Layer(True, self.sqrtBeta_0, i, n_samples, self.pcn, init_sample_sym) lay.LMat.current_L = Lu lay.LMat.latest_computed_L = Lu lay.stdev_sym = uStdev_sym lay.stdev = uStdev else: if i == n_layers - 1: lay = Layer(False, self.sqrtBeta_v, i, self.n_samples, self.pcn, Layers[i - 1].current_sample_sym) wNew = self.pcn.random_gen.construct_w() eNew = cp.random.randn(self.pcn.measurement.num_sample, dtype=cp.float32) wBar = cp.concatenate((eNew, wNew)) LBar = cp.vstack((self.pcn.H, lay.LMat.current_L)) lay.current_sample_sym, res, rnk, s = cp.linalg.lstsq( LBar, self.pcn.yBar - wBar, rcond=-1) #,rcond=None) lay.current_sample = lay.current_sample_sym[ f.basis_number_2D_ravel - 1:] else: # lay = layer.Layer(False, sqrtBeta_v*np.sqrt(sigma_scaling),i, n_samples, pcn,Layers[i-1].current_sample) lay = Layer(False, self.sqrtBeta_v * 0.1, i, self.n_samples, self.pcn, Layers[i - 1].current_sample) lay.update_current_sample() self.pcn.Layers_sqrtBetas[i] = lay.sqrt_beta lay.samples_history = np.empty( (history_length, self.pcn.fourier.basis_number_2D_ravel), dtype=np.complex64) Layers.append(lay) self.Layers = Layers
def compressed_allreduce(self, buffer_m: torch.tensor, worker_error, server_error, local_rank): # all_start_time = time.time() original_shape = buffer_m.size() if len(original_shape) > 1: buffer_m = torch.flatten(buffer_m) original_size = buffer_m.numel() worker_error_size = worker_error.numel() cupy.cuda.Device(local_rank).use() if original_size != worker_error_size: empty_tensor = torch.zeros(worker_error_size - original_size, device=buffer_m.device) buffer_m = torch.cat([buffer_m, empty_tensor]) buffer_m.add_(worker_error) worker_scale = torch.norm(buffer_m) / np.sqrt(torch.numel(buffer_m)) worker_error.set_( buffer_m - worker_scale * buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0)) cupy_sign_list_packed = self.compression_backend.compress_by_chunk( self.compression_backend.torch2cupy( buffer_m.sign_().add_(1).bool()), self.size) cupy_worker_scale = self.compression_backend.torch2cupy(worker_scale) cupy_recvbuf_sign = cupy.zeros( [self.size, cupy_sign_list_packed[self.rank].size], dtype=cupy_sign_list_packed[0].dtype) # cupy_recvbuf_scale = cupy.zeros([self.size, 1], dtype=cupy_worker_scale.dtype) sign_list_packed = [ self.compression_backend.cupy2torch(cupy_sign_list_packed[idx]) for idx in range(self.size) ] # worker_scale = self.compression_backend.cupy2torch(cupy_worker_scale) recvbuf_sign = self.compression_backend.cupy2torch(cupy_recvbuf_sign) #recvbuf_scale = self.compression_backend.cupy2torch(cupy_recvbuf_scale) recvbuf_scale = [ torch.zeros(1, dtype=worker_scale.dtype, device=torch.device(local_rank)) for i in range(self.size) ] # communication phase 1 # gather_start = time.time() # Alltoall for sign dist.all_to_all_single(recvbuf_sign, torch.stack(sign_list_packed)) # Allgather for scale dist.all_gather(recvbuf_scale, worker_scale) # gather_end = time.time() # cupy_sign_list_packed, sign_list_packed, cupy_worker_scale, worker_scale = None, None, None, None cupy_sign_list_packed = None cupy_recvbuf_sign = self.compression_backend.torch2cupy(recvbuf_sign) #cupy_recvbuf_scale = self.compression_backend.torch2cupy(torch.stack(recvbuf_scale)) compensated_server_m = self.compression_backend.cupy2torch( (cupy.unpackbits(cupy_recvbuf_sign.flatten())).reshape( self.size, -1)).float().add_(-0.5).mul_(2.0).mul_( torch.stack(recvbuf_scale).mul_(1 / self.size)).sum(0) compensated_server_m.add_(server_error) server_scale = torch.norm(compensated_server_m) / np.sqrt( compensated_server_m.numel()) server_error.set_(compensated_server_m - server_scale * compensated_server_m.sign().add_( 1).bool().float().add_(-0.5).mul_(2.0)) # cupy_server_scale = self.compression_backend.torch2cupy(server_scale) cupy_server_sign_packed = self.compression_backend.compress_by_chunk( self.compression_backend.torch2cupy( compensated_server_m.sign_().add_(1).bool()), 1) compensated_server_m = None cupy_recvbuf_sign_server = cupy.zeros( [self.size, cupy_server_sign_packed[0].size], dtype=cupy_recvbuf_sign.dtype) # cupy_recvbuf_sign, recvbuf_sign = None, None cupy_recvbuf_sign = None server_sign_packed = [ self.compression_backend.cupy2torch(cupy_server_sign_packed[0]) ] recvbuf_sign_server = [ self.compression_backend.cupy2torch(cupy_recvbuf_sign_server[idx]) for idx in range(self.size) ] # server_scale = self.compression_backend.cupy2torch(cupy_server_scale) cupy_recvbuf_scale_server = cupy.zeros([self.size, 1], dtype=cupy_worker_scale.dtype) # cupy_recvbuf_scale, recvbuf_scale = None, None recvbuf_scale_server = [ self.compression_backend.cupy2torch(cupy_recvbuf_scale_server[idx]) for idx in range(self.size) ] # Communication Phase 2 dist.all_gather(recvbuf_sign_server, server_sign_packed[0]) dist.all_gather(recvbuf_scale_server, server_scale) cupy_server_sign_packed = None # need to convert from a tensor list to a single tensor # dist.all_gather only provides a tensor list as the recv/output buffer recvbuf_sign_server = torch.stack(recvbuf_sign_server) cupy_recvbuf_sign_server = self.compression_backend.torch2cupy( recvbuf_sign_server) buffer_m.data.copy_( self.compression_backend.cupy2torch( (cupy.unpackbits(cupy_recvbuf_sign_server.flatten())).reshape( self.size, -1)).float().add_(-0.5).mul_(2.0).mul_( self.compression_backend.cupy2torch( cupy_recvbuf_scale_server)).flatten().data) if original_size != worker_error_size: buffer_m = buffer_m[0:original_size] if len(original_shape) > 1: buffer_m = buffer_m.reshape(original_shape) return buffer_m
LoadFrom = "weights.npy" # The pretrained model saving = False # Set it as True if you want to save the trained model best_perf = 0 Nnrn = [NhidenNeurons, NumOfClasses] # Number of neurons at hidden and output layers cats = [4, 1, 0, 7, 9, 2, 3, 5, 8, 6] # Reordering the categories # General variables images = [] # To keep training images labels = [] # To keep training labels images_test = [] # To keep test images labels_test = [] # To keep test labels W = [] # To hold the weights of hidden and output layers firingTime = [] # To hold the firing times of hidden and output layers Spikes = [] # To hold the spike trains of hidden and output layers X = [] # To be used in converting firing times to spike trains target = cp.zeros([NumOfClasses]) # To keep the target firing times of current image FiringFrequency = [] # to count number of spikes each neuron emits during an epoch # loading MNIST dataset mndata = MNIST('MNIST/') # mndata.gz = False Images, Labels = mndata.load_training() Images = np.array(Images) for i in range(len(Labels)): if Labels[i] in cats: images.append(np.floor((GrayLevels - Images[i].reshape(28, 28)) * tmax / GrayLevels).astype(int)) labels.append(cats.index(Labels[i])) Images, Labels = mndata.load_testing() Images = np.array(Images)
def gels(a, b): """Solves over/well/under-determined linear systems. Computes least-square solution to equation ``ax = b` by QR factorization using cusolverDn<t>geqrf(). Args: a (cupy.ndarray): The matrix with dimension ``(M, N)``. b (cupy.ndarray): The matrix with dimension ``(M)`` or ``(M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(N)`` or ``(N, K)``. """ if a.ndim != 2: raise ValueError('a.ndim must be 2 (actual: {})'.format(a.ndim)) if b.ndim == 1: nrhs = 1 elif b.ndim == 2: nrhs = b.shape[1] else: raise ValueError('b.ndim must be 1 or 2 (actual: {})'.format(b.ndim)) if a.shape[0] != b.shape[0]: raise ValueError('shape mismatch (a: {}, b: {}).'. format(a.shape, b.shape)) if a.dtype != b.dtype: raise ValueError('dtype mismatch (a: {}, b: {}).'. format(a.dtype, b.dtype)) dtype = a.dtype if dtype == 'f': t = 's' elif dtype == 'd': t = 'd' elif dtype == 'F': t = 'c' elif dtype == 'D': t = 'z' else: raise ValueError('unsupported dtype (actual: {})'.format(dtype)) geqrf_helper = getattr(_cusolver, t + 'geqrf_bufferSize') geqrf = getattr(_cusolver, t + 'geqrf') trsm = getattr(_cublas, t + 'trsm') if t in 'sd': ormqr_helper = getattr(_cusolver, t + 'ormqr_bufferSize') ormqr = getattr(_cusolver, t + 'ormqr') else: ormqr_helper = getattr(_cusolver, t + 'unmqr_bufferSize') ormqr = getattr(_cusolver, t + 'unmqr') no_trans = _cublas.CUBLAS_OP_N if dtype.char in 'fd': trans = _cublas.CUBLAS_OP_T else: trans = _cublas.CUBLAS_OP_C m, n = a.shape mn_min = min(m, n) dev_info = _cupy.empty(1, dtype=_numpy.int32) tau = _cupy.empty(mn_min, dtype=dtype) cusolver_handle = _device.get_cusolver_handle() cublas_handle = _device.get_cublas_handle() if m >= n: # over/well-determined systems a = a.copy(order='F') b = b.copy(order='F') # geqrf (QR decomposition, A = Q * R) ws_size = geqrf_helper(cusolver_handle, m, n, a.data.ptr, m) workspace = _cupy.empty(ws_size, dtype=dtype) geqrf(cusolver_handle, m, n, a.data.ptr, m, tau.data.ptr, workspace.data.ptr, ws_size, dev_info.data.ptr) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) # ormqr (Computes Q^T * B) ws_size = ormqr_helper( cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, trans, m, nrhs, mn_min, a.data.ptr, m, tau.data.ptr, b.data.ptr, m) workspace = _cupy.empty(ws_size, dtype=dtype) ormqr(cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, trans, m, nrhs, mn_min, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, ws_size, dev_info.data.ptr) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( ormqr, dev_info) # trsm (Solves R * X = (Q^T * B)) trsm(cublas_handle, _cublas.CUBLAS_SIDE_LEFT, _cublas.CUBLAS_FILL_MODE_UPPER, no_trans, _cublas.CUBLAS_DIAG_NON_UNIT, mn_min, nrhs, 1, a.data.ptr, m, b.data.ptr, m) return b[:n] else: # under-determined systems a = a.conj().T.copy(order='F') bb = b out_shape = (n,) if b.ndim == 1 else (n, nrhs) b = _cupy.zeros(out_shape, dtype=dtype, order='F') b[:m] = bb # geqrf (QR decomposition, A^T = Q * R) ws_size = geqrf_helper(cusolver_handle, n, m, a.data.ptr, n) workspace = _cupy.empty(ws_size, dtype=dtype) geqrf(cusolver_handle, n, m, a.data.ptr, n, tau.data.ptr, workspace.data.ptr, ws_size, dev_info.data.ptr) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) # trsm (Solves R^T * Z = B) trsm(cublas_handle, _cublas.CUBLAS_SIDE_LEFT, _cublas.CUBLAS_FILL_MODE_UPPER, trans, _cublas.CUBLAS_DIAG_NON_UNIT, m, nrhs, 1, a.data.ptr, n, b.data.ptr, n) # ormqr (Computes Q * Z) ws_size = ormqr_helper( cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, no_trans, n, nrhs, mn_min, a.data.ptr, n, tau.data.ptr, b.data.ptr, n) workspace = _cupy.empty(ws_size, dtype=dtype) ormqr(cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, no_trans, n, nrhs, mn_min, a.data.ptr, n, tau.data.ptr, b.data.ptr, n, workspace.data.ptr, ws_size, dev_info.data.ptr) _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( ormqr, dev_info) return b
def test_moments_normalized_invalid(): with pytest.raises(ValueError): moments_normalized(cp.zeros((3, 3)), 3) with pytest.raises(ValueError): moments_normalized(cp.zeros((3, 3)), 4)
def convolutional_barycenter_gpu(Hv, reg, alpha, stabThresh=1e-30, niter=1500, tol=1e-9, sharpening=False, verbose=False): """Main function solving wasserstein barycenter problem using gpu Arguments: Hv {Set of distributions (cparray)} -- reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40 alpha {list} -- set of weights Keyword Arguments: stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30}) niter {int} -- Maximum number of loop iteration (default: {1500}) tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9}) sharpening {bool} -- Whether or not entropic sharpening is used (default: {False}) verbose {bool} -- verbose option Returns: cparray -- solution of weighted wassertein barycenter problem """ def K(x): return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg)) def to_find_root(barycenter, H0, beta): return entropy(barycenter**beta) - H0 alpha = cp.array(alpha) alpha = alpha / alpha.sum() Hv = cp.array(Hv) mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2. #print('mean weights', mean_weights) for i in range(len(Hv)): Hv[i] = Hv[i] / Hv[i].sum() v = cp.ones(Hv.shape) Kw = cp.ones(Hv.shape) entropy_max = max_entropy(Hv) barycenter = cp.zeros(Hv[0].shape) change = 1 for j in range(niter): t0 = time.time() barycenterOld = barycenter barycenter = cp.zeros_like(Hv[0, :, :]) for i in range(Hv.shape[0]): Kw[i, :, :] = K(Hv[i, :, :] / cp.maximum(stabThresh, K(v[i, :, :]))) barycenter += alpha[i] * cp.log( cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :])) barycenter = cp.exp(barycenter) change = cp.sum(cp.abs(barycenter - barycenterOld)) if sharpening: if (entropy(barycenter)) > (entropy_max): beta = newton( lambda beta: to_find_root(barycenter, entropy_max, beta), 1, tol=1e-6) if beta < 0: beta = 1 else: beta = 1 barycenter = barycenter**beta for i in range(Hv.shape[0]): v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :]) if verbose: #sys.stdout('output.log','a') print("iter : ", j, "change : ", change, 'time :', time.time() - t0) if change < tol: break return cp.asnumpy(barycenter)
line = f.readline() f.close() #ベクトルの元データを取得(base) add_vector = [] f = codecs.open('vectors.txt', 'r', 'utf8') line = f.readline() while line: v = cp.array(line.split(','), dtype=cp.float32) add_vector.append(v) line = f.readline() f.close() delta = 0.01 #変化係数 #ターゲットのベクトルを作成 tgt_vector = cp.zeros((100, 1, 100), dtype=cp.float32) for i in range(100): tgt_vector[i] = (add_vector[0] * (1 - (i * delta))) + (org_vector[0] * (i * delta)) print(tgt_vector.shape) #ターゲットのベクトルから画像を生成 for i in range(100): with chainer.using_config('train', False): result = model(tgt_vector[i]) data = np.zeros((128, 128, 3), dtype=np.uint8) dst = result.data[0] * 255.0 data[:, :, 0] = dst[0] data[:, :, 1] = dst[1] data[:, :, 2] = dst[2]
def rollout(rollout_arg_tuple): try: global initial_z_t generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple random_rollouts_dir = os.path.join(args.data_dir, args.game, args.experiment_name, 'random_rollouts') if args.in_dream: log( ID, "Loading random rollouts for initial frames for dream training" ) initial_z_t = ModelDataset(dir=random_rollouts_dir, load_batch_size=args.initial_z_size, verbose=False) # The same starting seed gets passed in multiprocessing, need to reset it for each process: np.random.seed() if not with_frames: log( ID, ">>> Starting generation #" + str(generation) + ", mutation #" + str(mutation_idx + 1) + ", trial #" + str(trial + 1)) else: frames_array = [] start_time = time.time() model.reset_state() if args.in_dream: z_t, _, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))] z_t = z_t[0] if gpu is not None: z_t = cuda.to_gpu(z_t) if with_frames: observation = vision.decode(z_t).data if gpu is not None: observation = cp.asnumpy(observation) observation = post_process_image_tensor(observation)[0] else: # free up precious GPU memory: if gpu is not None: vision.to_cpu() vision = None if args.initial_z_noise > 0.: if gpu is not None: z_t += cp.random.normal(0., args.initial_z_noise, z_t.shape).astype(cp.float32) else: z_t += np.random.normal(0., args.initial_z_noise, z_t.shape).astype(np.float32) else: if args.game in DOOM_GAMES: env = ViZDoomWrapper(args.game) else: env = gym.make(args.game) observation = env.reset() if with_frames: frames_array.append(observation) if gpu is not None: h_t = cp.zeros(args.hidden_dim).astype(cp.float32) c_t = cp.zeros(args.hidden_dim).astype(cp.float32) else: h_t = np.zeros(args.hidden_dim).astype(np.float32) c_t = np.zeros(args.hidden_dim).astype(np.float32) done = False cumulative_reward = 0 t = 0 while not done: if not args.in_dream: observation = imresize(observation, (args.frame_resize, args.frame_resize)) observation = pre_process_image_tensor( np.expand_dims(observation, 0)) if gpu is not None: observation = cuda.to_gpu(observation) z_t = vision.encode(observation, return_z=True).data[0] a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu) if args.in_dream: z_t, done = model(z_t, a_t, temperature=args.temperature) done = done.data[0] if with_frames: observation = post_process_image_tensor( vision.decode(z_t).data)[0] reward = 1 if done >= args.done_threshold: done = True else: done = False else: observation, reward, done, _ = env.step( a_t if gpu is None else cp.asnumpy(a_t)) model(z_t, a_t, temperature=args.temperature) if with_frames: frames_array.append(observation) cumulative_reward += reward h_t = model.get_h().data[0] c_t = model.get_c().data[0] t += 1 if max_timesteps is not None and t == max_timesteps: break elif args.in_dream and t == args.dream_max_len: log( ID, ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!" .format(generation, str(mutation_idx + 1), str(trial + 1), t)) break if not args.in_dream: env.close() if not with_frames: log( ID, ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}" .format(generation, str(mutation_idx + 1), str(trial + 1), t, (time.time() - start_time), cumulative_reward)) return cumulative_reward else: frames_array = np.asarray(frames_array) if args.game in DOOM_GAMES and not args.in_dream: frames_array = post_process_image_tensor(frames_array) return cumulative_reward, np.asarray(frames_array) except Exception: print(traceback.format_exc()) return 0.
def calc_pgh(ispec, wavelengths, psfparams): ''' Calculate the pixelated Gauss Hermite for all wavelengths of a single spectrum ispec : integer spectrum number wavelengths : array of wavelengths to evaluate psfparams : dictionary of PSF parameters returned by evalcoeffs returns pGHx, pGHy where pGHx[ghdeg+1, nwave, nbinsx] contains the pixel-integrated Gauss-Hermite polynomial for all degrees at all wavelengths across nbinsx bins spaning the PSF spot, and similarly for pGHy. The core PSF will then be evaluated as PSFcore = sum_ij c_ij outer(pGHy[j], pGHx[i]) ''' #- shorthand p = psfparams #- spot size (ny,nx) nx = p['HSIZEX'] ny = p['HSIZEY'] nwave = len(wavelengths) p['X'], p['Y'], p['GHSIGX'], p['GHSIGY'] = \ cp.array(p['X']), cp.array(p['Y']), cp.array(p['GHSIGX']), cp.array(p['GHSIGY']) xedges = cp.repeat(cp.arange(nx + 1) - nx // 2, nwave).reshape(nx + 1, nwave) yedges = cp.repeat(cp.arange(ny + 1) - ny // 2, nwave).reshape(ny + 1, nwave) #- Shift to be relative to the PSF center at 0 and normalize #- by the PSF sigma (GHSIGX, GHSIGY) #- xedges[nx+1, nwave] #- yedges[ny+1, nwave] xedges = (xedges - p['X'][ispec] % 1) / p['GHSIGX'][ispec] yedges = (yedges - p['Y'][ispec] % 1) / p['GHSIGY'][ispec] #- Degree of the Gauss-Hermite polynomials ghdegx = p['GHDEGX'] ghdegy = p['GHDEGY'] #- Evaluate the Hermite polynomials at the pixel edges #- HVx[ghdegx+1, nwave, nx+1] #- HVy[ghdegy+1, nwave, ny+1] HVx = hermevander_wrapper(xedges, ghdegx).T HVy = hermevander_wrapper(yedges, ghdegy).T #- Evaluate the Gaussians at the pixel edges #- Gx[nwave, nx+1] #- Gy[nwave, ny+1] Gx = cp.exp(-0.5 * xedges**2).T / cp.sqrt(2. * cp.pi) Gy = cp.exp(-0.5 * yedges**2).T / cp.sqrt(2. * cp.pi) #- Combine into Gauss*Hermite GHx = HVx * Gx GHy = HVy * Gy #- Integrate over the pixels using the relationship # Integral{ H_k(x) exp(-0.5 x^2) dx} = -H_{k-1}(x) exp(-0.5 x^2) + const #- pGHx[ghdegx+1, nwave, nx] #- pGHy[ghdegy+1, nwave, ny] pGHx = cp.zeros((ghdegx + 1, nwave, nx)) pGHy = cp.zeros((ghdegy + 1, nwave, ny)) pGHx[0] = 0.5 * cp.diff(cupyx.scipy.special.erf(xedges / cp.sqrt(2.)).T) pGHy[0] = 0.5 * cp.diff(cupyx.scipy.special.erf(yedges / cp.sqrt(2.)).T) pGHx[1:] = GHx[:ghdegx, :, 0:nx] - GHx[:ghdegx, :, 1:nx + 1] pGHy[1:] = GHy[:ghdegy, :, 0:ny] - GHy[:ghdegy, :, 1:ny + 1] return pGHx, pGHy
def eigsh(a, k=6, *, which='LM', ncv=None, maxiter=None, tol=0, return_eigenvectors=True): """Finds ``k`` eigenvalues and eigenvectors of the real symmetric matrix. Solves ``Ax = wx``, the standard eigenvalue problem for ``w`` eigenvalues with corresponding eigenvectors ``x``. Args: a (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): A symmetric square matrix with dimension ``(n, n)``. k (int): The number of eigenvalues and eigenvectors to compute. Must be ``1 <= k < n``. which (str): 'LM' or 'LA'. 'LM': finds ``k`` largest (in magnitude) eigenvalues. 'LA': finds ``k`` largest (algebraic) eigenvalues. ncv (int): The number of Lanczos vectors generated. Must be ``k + 1 < ncv < n``. If ``None``, default value is used. maxiter (int): Maximum number of Lanczos update iterations. If ``None``, default value is used. tol (float): Tolerance for residuals ``||Ax - wx||``. If ``0``, machine precision is used. return_eigenvectors (bool): If ``True``, returns eigenvectors in addition to eigenvalues. Returns: tuple: If ``return_eigenvectors is True``, it returns ``w`` and ``x`` where ``w`` is eigenvalues and ``x`` is eigenvectors. Otherwise, it returns only ``w``. .. seealso:: :func:`scipy.sparse.linalg.eigsh` .. note:: This function uses the thick-restart Lanczos methods (https://sdm.lbl.gov/~kewu/ps/trlan.html). """ n = a.shape[0] if a.ndim != 2 or a.shape[0] != a.shape[1]: raise ValueError('expected square matrix (shape: {})'.format(a.shape)) if a.dtype.char not in 'fdFD': raise TypeError('unsupprted dtype (actual: {})'.format(a.dtype)) if k <= 0: raise ValueError('k must be greater than 0 (actual: {})'.format(k)) if k >= n: raise ValueError('k must be smaller than n (actual: {})'.format(k)) if which not in ('LM', 'LA'): raise ValueError('which must be \'LM\' or \'LA\' (actual: {})' ''.format(which)) if ncv is None: ncv = min(max(2 * k, k + 32), n - 1) else: ncv = min(max(ncv, k + 2), n - 1) if maxiter is None: maxiter = 10 * n if tol == 0: tol = numpy.finfo(a.dtype).eps alpha = cupy.zeros((ncv, ), dtype=a.dtype) beta = cupy.zeros((ncv, ), dtype=a.dtype.char.lower()) V = cupy.empty((ncv, n), dtype=a.dtype) # Set initial vector u = cupy.random.random((n, )).astype(a.dtype) V[0] = u / cublas.nrm2(u) # Choose Lanczos implementation, unconditionally use 'fast' for now upadte_impl = 'fast' if upadte_impl == 'fast': lanczos = _lanczos_fast(a, n, ncv) else: lanczos = _lanczos_asis # Lanczos iteration lanczos(a, V, u, alpha, beta, 0, ncv) iter = ncv w, s = _eigsh_solve_ritz(alpha, beta, None, k, which) x = V.T @ s # Compute residual beta_k = beta[-1] * s[-1, :] res = cublas.nrm2(beta_k) while res > tol and iter < maxiter: # Setup for thick-restart beta[:k] = 0 alpha[:k] = w V[:k] = x.T u -= u.T @ V[:k].conj().T @ V[:k] V[k] = u / cublas.nrm2(u) u[...] = a @ V[k] cublas.dotc(V[k], u, out=alpha[k]) u -= alpha[k] * V[k] u -= V[:k].T @ beta_k cublas.nrm2(u, out=beta[k]) V[k + 1] = u / beta[k] # Lanczos iteration lanczos(a, V, u, alpha, beta, k + 1, ncv) iter += ncv - k w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which) x = V.T @ s # Compute residual beta_k = beta[-1] * s[-1, :] res = cublas.nrm2(beta_k) if return_eigenvectors: idx = cupy.argsort(w) return w[idx], x[:, idx] else: return cupy.sort(w)
return self._buf != other._buf def __len__(self): return len(self._buf) def __getitem__(self, item): return self._buf[item] def __setitem__(self, item, value): self._buf[item] = value._buf cupy_issue_2259 = False if cupy is not None: cupy_issue_2259 = not isinstance( cupy.zeros((2, 2)).T.__cuda_array_interface__['strides'], tuple) # --- def Sendrecv(smsg, rmsg): MPI.COMM_SELF.Sendrecv(sendbuf=smsg, dest=0, sendtag=0, recvbuf=rmsg, source=0, recvtag=0, status=MPI.Status()) class TestMessageSimple(unittest.TestCase):
def testOrderFortran(self): sbuf = cupy.ones([3, 2]).T rbuf = cupy.zeros([3, 2]).T Sendrecv(sbuf, rbuf) self.assertTrue((sbuf == rbuf).all())
def test_ndarray_indices_false(self): nd_image = cp.zeros((5, 5, 5)) nd_image[2, 2, 2] = 1 peaks = peak.peak_local_max(nd_image, min_distance=1, indices=False) assert (peaks == nd_image.astype(cp.bool)).all()
def testOrderC(self): sbuf = cupy.ones([3, 2]) rbuf = cupy.zeros([3, 2]) Sendrecv(sbuf, rbuf) self.assertTrue((sbuf == rbuf).all())
def _select( input, labels=None, index=None, find_min=False, find_max=False, find_min_positions=False, find_max_positions=False, find_median=False, ): """Return one or more of: min, max, min position, max position, median. If neither `labels` or `index` is provided, these are the global values in `input`. If `index` is None, but `labels` is provided, a global value across all non-zero labels is given. When both `labels` and `index` are provided, lists of values are provided for each labeled region specified in `index`. See further details in :func:`cupyx.scipy.ndimage.minimum`, etc. Used by minimum, maximum, minimum_position, maximum_position, extrema. """ find_positions = find_min_positions or find_max_positions positions = None if find_positions: positions = cupy.arange(input.size).reshape(input.shape) def single_group(vals, positions): result = [] if find_min: result += [vals.min()] if find_min_positions: result += [positions[vals == vals.min()][0]] if find_max: result += [vals.max()] if find_max_positions: result += [positions[vals == vals.max()][0]] if find_median: result += [cupy.median(vals)] return result if labels is None: return single_group(input, positions) # ensure input and labels match sizes input, labels = cupy.broadcast_arrays(input, labels) if index is None: mask = labels > 0 masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) if cupy.isscalar(index): mask = labels == index masked_positions = None if find_positions: masked_positions = positions[mask] return single_group(input[mask], masked_positions) index = cupy.asarray(index) safe_int = _safely_castable_to_int(labels.dtype) min_label = labels.min() max_label = labels.max() # Remap labels to unique integers if necessary, or if the largest label is # larger than the number of values. if not safe_int or min_label < 0 or max_label > labels.size: # Remap labels, and indexes unique_labels, labels = cupy.unique(labels, return_inverse=True) idxs = cupy.searchsorted(unique_labels, index) # Make all of idxs valid idxs[idxs >= unique_labels.size] = 0 found = unique_labels[idxs] == index else: # Labels are an integer type, and there aren't too many idxs = cupy.asanyarray(index, cupy.int).copy() found = (idxs >= 0) & (idxs <= max_label) idxs[~found] = max_label + 1 input = input.ravel() labels = labels.ravel() if find_positions: positions = positions.ravel() if hasattr(cupy, "_core"): using_cub = (cupy._core._accelerator.ACCELERATOR_CUB in cupy._core.get_routine_accelerators()) else: using_cub = (cupy.core._accelerator.ACCELERATOR_CUB in cupy.core.get_routine_accelerators()) if using_cub: # Cutoff values below were determined empirically for relatively large # input arrays. if find_positions or find_median: n_label_cutoff = 15 else: n_label_cutoff = 30 else: n_label_cutoff = 0 if n_label_cutoff and len(idxs) <= n_label_cutoff: return _select_via_looping( input, labels, idxs, positions, find_min, find_min_positions, find_max, find_max_positions, find_median, ) order = cupy.lexsort(cupy.stack((input.ravel(), labels.ravel()))) input = input[order] labels = labels[order] if find_positions: positions = positions[order] # Determine indices corresponding to the min or max value for each label label_change_index = cupy.searchsorted(labels, cupy.arange(1, max_label + 2)) if find_min or find_min_positions or find_median: # index corresponding to the minimum value at each label min_index = label_change_index[:-1] if find_max or find_max_positions or find_median: # index corresponding to the maximum value at each label max_index = label_change_index[1:] - 1 result = [] # the order below matches the order expected by cupy.ndimage.extrema if find_min: mins = cupy.zeros(int(labels.max()) + 2, input.dtype) mins[labels[min_index]] = input[min_index] result += [mins[idxs]] if find_min_positions: minpos = cupy.zeros(labels.max().item() + 2, int) minpos[labels[min_index]] = positions[min_index] result += [minpos[idxs]] if find_max: maxs = cupy.zeros(int(labels.max()) + 2, input.dtype) maxs[labels[max_index]] = input[max_index] result += [maxs[idxs]] if find_max_positions: maxpos = cupy.zeros(labels.max().item() + 2, int) maxpos[labels[max_index]] = positions[max_index] result += [maxpos[idxs]] if find_median: locs = cupy.arange(len(labels)) lo = cupy.zeros(int(labels.max()) + 2, cupy.int) lo[labels[min_index]] = locs[min_index] hi = cupy.zeros(int(labels.max()) + 2, cupy.int) hi[labels[max_index]] = locs[max_index] lo = lo[idxs] hi = hi[idxs] # lo is an index to the lowest value in input for each label, # hi is an index to the largest value. # move them to be either the same ((hi - lo) % 2 == 0) or next # to each other ((hi - lo) % 2 == 1), then average. step = (hi - lo) // 2 lo += step hi -= step if input.dtype.kind in "iub": # fix for https://github.com/scipy/scipy/issues/12836 result += [ (input[lo].astype(float) + input[hi].astype(float)) / 2.0 ] else: result += [(input[lo] + input[hi]) / 2.0] return result
def test_flat_peak(self): image = cp.zeros((5, 5), dtype=cp.uint8) image[1:3, 1:3] = 10 peaks = peak.peak_local_max(image, min_distance=1) assert len(peaks) == 4
def fit(self, item_users, show_progress=True): """Factorizes the item_users matrix Parameters ---------- item_users: coo_matrix Matrix of confidences for the liked items. This matrix should be a coo_matrix where the rows of the matrix are the item, and the columns are the users that liked that item. BPR ignores the weight value of the matrix right now - it treats non zero entries as a binary signal that the user liked the item. show_progress : bool, optional Whether to show a progress bar """ rs = check_random_state(self.random_state) # for now, all we handle is float 32 values if item_users.dtype != np.float32: item_users = item_users.astype(np.float32) items, users = item_users.shape # We need efficient user lookup for case of removing own likes # TODO: might make more sense to just changes inputs to be users by items instead # but that would be a major breaking API change user_items = item_users.T.tocsr() if not user_items.has_sorted_indices: user_items.sort_indices() # this basically calculates the 'row' attribute of a COO matrix # without requiring us to get the whole COO matrix user_counts = np.ediff1d(user_items.indptr) userids = np.repeat(np.arange(users), user_counts).astype(user_items.indices.dtype) # create factors if not already created. # Note: the final dimension is for the item bias term - which is set to a 1 for all users # this simplifies interfacing with approximate nearest neighbours libraries etc if self.item_factors is None: self.item_factors = rs.rand( items, self.factors + 1, dtype=cp.float32) - 0.5 self.item_factors /= self.factors # set factors to all zeros for items without any ratings item_counts = np.bincount(user_items.indices, minlength=items) self.item_factors[item_counts == 0] = cp.zeros(self.factors + 1) if self.user_factors is None: self.user_factors = rs.rand( users, self.factors + 1, dtype=cp.float32) - 0.5 self.user_factors /= self.factors # set factors to all zeros for users without any ratings self.user_factors[user_counts == 0] = cp.zeros(self.factors + 1) self.user_factors[:, self.factors] = 1.0 self._item_norms = self._user_norms = None userids = implicit.gpu.IntVector(userids) itemids = implicit.gpu.IntVector(user_items.indices) indptr = implicit.gpu.IntVector(user_items.indptr) X = implicit.gpu.Matrix(self.user_factors) Y = implicit.gpu.Matrix(self.item_factors) log.debug("Running %i BPR training epochs", self.iterations) with tqdm(total=self.iterations, disable=not show_progress) as progress: for epoch in range(self.iterations): correct, skipped = implicit.gpu.bpr_update( userids, itemids, indptr, X, Y, self.learning_rate, self.regularization, rs.randint(2**31), self.verify_negative_samples, ) progress.update(1) total = len(user_items.data) if total != 0 and total != skipped: progress.set_postfix({ "correct": "%.2f%%" % (100.0 * correct / (total - skipped)), "skipped": "%.2f%%" % (100.0 * skipped / total), })
def convolutional_barycenter_gpu(Hv, reg: float, alpha: np.ndarray, stabThresh=1e-30, niter=1500, tol=1e-9, sharpening=False, verbose=False): """Main function solving wasserstein barycenter problem using gpu Parameters: Hv {Set of distributions (cparray)} -- reg {regularization term "gamma" } -- float superior to 0, generally equals size of space/40 alpha {list } -- set of weights Keyword Parameters: stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30}) niter {int } -- Maximum number of loop iteration (default: {1500}) tolerance {float} -- convergence tolerance at which point iterations stop (default: {1e-9}) sharpening {bool } -- Whether or not entropic sharpening is used (default: {False}) verbose {bool } -- verbose option Returns: cparray -- solution of weighted wassertein barycenter problem """ import cupy as cp from cupyx.scipy.ndimage import gaussian_filter as cupyx_gaussian_filter def K_cupyx(x): return cupyx_gaussian_filter(x, sigma=reg) def to_find_root(barycenter, H0, beta): return entropy(barycenter**beta) - H0 alpha = cp.array(alpha) alpha = alpha / alpha.sum() Hv = cp.array(Hv) for i in range(len(Hv)): Hv[i] = Hv[i] / Hv[i].sum() v = cp.ones(Hv.shape) Kw = cp.ones(Hv.shape) entropy_max = max_entropy(Hv) barycenter = cp.zeros(Hv[0].shape) cumtime_agg = 0 rolling_delta = [] cumtime = [] iterations = [] change = 1 for j in range(niter): print("For every iteration.. ") t0 = time.time() barycenterOld = barycenter barycenter = cp.zeros_like(Hv[0, :, :]) print("Hv shape is", Hv.shape) for i in range(Hv.shape[0]): #* for each of two distributions(which are identical) #* distribution *i* becomes Kernel of (dist1 over the Kernel of v(i)) Kw[i, :, :] = K_cupyx(Hv[i, :, :] / cp.maximum(stabThresh, K_cupyx(v[i, :, :]))) #* barycenter is barycenter plus weighted log of v(i)*Kw(i) barycenter += alpha[i] * cp.log( cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :])) barycenter = cp.exp(barycenter) change = cp.sum(cp.abs(barycenter - barycenterOld)) if sharpening: if (entropy(barycenter)) > (entropy_max): beta = newton( lambda beta: to_find_root(barycenter, entropy_max, beta), 1, tol=1e-6) if beta < 0: beta = 1 else: beta = 1 barycenter = barycenter**beta for i in range(Hv.shape[0]): # assign to v(i) barycenter normalized by Kw(i)'s largest v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :]) elapsed = np.around(time.time() - t0, 4) delta = np.around(change, 10) cumtime_agg += elapsed iterations.append(j) cumtime.append(cumtime_agg) rolling_delta.append(float(delta)) print(f"Refinement iter {j} | delta: {delta} | elapsed : {elapsed}") if change < tol: print(f"Exited. Change {change} under tolerance.") log = { "iterations": iterations, "cumtime": cumtime, "rolling_delta": rolling_delta, "exited_on": j, "exited_under_tolerance": True } # print(f"Exited with 0 on iter {j}") return [cp.asnumpy(barycenter), log] break log = { "iterations": iterations, "cumtime": cumtime, "rolling_delta": rolling_delta, "exited_on": j, "exited_under_tolerance": False } # print(f"Exited with 0 on iter {j}") return [cp.asnumpy(barycenter), log]
def make_classification(n_samples=100, n_features=20, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None, order='F', dtype='float32', _centroids=None, _informative_covariance=None, _redundant_covariance=None, _repeated_indices=None): """ Generate a random n-class classification problem. This initially creates clusters of points normally distributed (std=1) about vertices of an `n_informative`-dimensional hypercube with sides of length :py:`2*class_sep` and assigns an equal number of clusters to each class. It introduces interdependence between these features and adds various types of further noise to the data. Without shuffling, `X` horizontally stacks features in the following order: the primary `n_informative` features, followed by `n_redundant` linear combinations of the informative features, followed by `n_repeated` duplicates, drawn randomly with replacement from the informative and redundant features. The remaining features are filled with random noise. Thus, without shuffling, all useful features are contained in the columns :py:`X[:, :n_informative + n_redundant + n_repeated]`. Examples -------- .. code-block:: python >>> from cuml.datasets.classification import make_classification >>> X, y = make_classification(n_samples=10, n_features=4, ... n_informative=2, n_classes=2, ... random_state=10) >>> print(X) # doctest: +SKIP [[-1.7974224 0.24425316 0.39062843 -0.38293394] [ 0.6358963 1.4161923 0.06970507 -0.16085647] [-0.22802866 -1.1827322 0.3525861 0.276615 ] [ 1.7308872 0.43080002 0.05048406 0.29837844] [-1.9465544 0.5704457 -0.8997551 -0.27898186] [ 1.0575483 -0.9171263 0.09529338 0.01173469] [ 0.7917619 -1.0638094 -0.17599393 -0.06420116] [-0.6686142 -0.13951421 -0.6074711 0.21645583] [-0.88968956 -0.914443 0.1302423 0.02924336] [-0.8817671 -0.84549576 0.1845096 0.02556021]] >>> print(y) [0 1 0 1 1 0 0 1 0 0] Parameters ---------- n_samples : int, optional (default=100) The number of samples. n_features : int, optional (default=20) The total number of features. These comprise `n_informative` informative features, `n_redundant` redundant features, `n_repeated` duplicated features and :py:`n_features-n_informative-n_redundant-n_repeated` useless features drawn at random. n_informative : int, optional (default=2) The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension `n_informative`. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube. n_redundant : int, optional (default=2) The number of redundant features. These features are generated as random linear combinations of the informative features. n_repeated : int, optional (default=0) The number of duplicated features, drawn randomly from the informative and the redundant features. n_classes : int, optional (default=2) The number of classes (or labels) of the classification problem. n_clusters_per_class : int, optional (default=2) The number of clusters per class. weights : array-like of shape (n_classes,) or (n_classes - 1,),\ (default=None) The proportions of samples assigned to each class. If None, then classes are balanced. Note that if :py:`len(weights) == n_classes - 1`, then the last class weight is automatically inferred. More than `n_samples` samples may be returned if the sum of `weights` exceeds 1. flip_y : float, optional (default=0.01) The fraction of samples whose class is assigned randomly. Larger values introduce noise in the labels and make the classification task harder. class_sep : float, optional (default=1.0) The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task easier. hypercube : boolean, optional (default=True) If True, the clusters are put on the vertices of a hypercube. If False, the clusters are put on the vertices of a random polytope. shift : float, array of shape [n_features] or None, optional (default=0.0) Shift features by the specified value. If None, then features are shifted by a random value drawn in [-class_sep, class_sep]. scale : float, array of shape [n_features] or None, optional (default=1.0) Multiply features by the specified value. If None, then features are scaled by a random value drawn in [1, 100]. Note that scaling happens after shifting. shuffle : boolean, optional (default=True) Shuffle the samples and the features. random_state : int, RandomState instance or None (default) Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary <random_state>`. order: str, optional (default='F') The order of the generated samples dtype : str, optional (default='float32') Dtype of the generated samples _centroids: array of centroids of shape (n_clusters, n_informative) _informative_covariance: array for covariance between informative features of shape (n_clusters, n_informative, n_informative) _redundant_covariance: array for covariance between redundant features of shape (n_informative, n_redundant) _repeated_indices: array of indices for the repeated features of shape (n_repeated, ) Returns ------- X : device array of shape [n_samples, n_features] The generated samples. y : device array of shape [n_samples] The integer labels for class membership of each sample. Notes ----- The algorithm is adapted from Guyon [1]_ and was designed to generate the "Madelon" dataset. How we optimized for GPUs: 1. Firstly, we generate X from a standard univariate instead of zeros. This saves memory as we don't need to generate univariates each time for each feature class (informative, repeated, etc.) while also providing the added speedup of generating a big matrix on GPU 2. We generate :py:`order=F` construction. We exploit the fact that X is a generated from a univariate normal, and covariance is introduced with matrix multiplications. Which means, we can generate X as a 1D array and just reshape it to the desired order, which only updates the metadata and eliminates copies 3. Lastly, we also shuffle by construction. Centroid indices are permuted for each sample, and then we construct the data for each centroid. This shuffle works for both :py:`order=C` and :py:`order=F` and eliminates any need for secondary copies References ---------- .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable selection benchmark", 2003. """ cuml.internals.set_api_output_type("cupy") generator = _create_rs_generator(random_state) np_seed = int(generator.randint(n_samples, size=1)) np.random.seed(np_seed) # Count features, clusters and samples if n_informative + n_redundant + n_repeated > n_features: raise ValueError("Number of informative, redundant and repeated " "features must sum to less than the number of total" " features") # Use log2 to avoid overflow errors if n_informative < np.log2(n_classes * n_clusters_per_class): msg = "n_classes({}) * n_clusters_per_class({}) must be" msg += " smaller or equal 2**n_informative({})={}" raise ValueError( msg.format(n_classes, n_clusters_per_class, n_informative, 2**n_informative)) if weights is not None: if len(weights) not in [n_classes, n_classes - 1]: raise ValueError("Weights specified but incompatible with number " "of classes.") if len(weights) == n_classes - 1: if isinstance(weights, list): weights = weights + [1.0 - sum(weights)] else: weights = np.resize(weights, n_classes) weights[-1] = 1.0 - sum(weights[:-1]) else: weights = [1.0 / n_classes] * n_classes n_clusters = n_classes * n_clusters_per_class # Distribute samples among clusters by weight n_samples_per_cluster = [ int(n_samples * weights[k % n_classes] / n_clusters_per_class) for k in range(n_clusters) ] for i in range(n_samples - sum(n_samples_per_cluster)): n_samples_per_cluster[i % n_clusters] += 1 # Initialize X and y X = generator.randn(n_samples * n_features, dtype=dtype) X = X.reshape((n_samples, n_features), order=order) y = cp.zeros(n_samples, dtype=np.int64) # Build the polytope whose vertices become cluster centroids if _centroids is None: centroids = cp.array( _generate_hypercube(n_clusters, n_informative, generator)).astype(dtype, copy=False) else: centroids = _centroids centroids *= 2 * class_sep centroids -= class_sep if not hypercube: centroids *= generator.rand(n_clusters, 1, dtype=dtype) centroids *= generator.rand(1, n_informative, dtype=dtype) # Create redundant features if n_redundant > 0: if _redundant_covariance is None: B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 else: B = _redundant_covariance # Create each cluster; a variant of make_blobs if shuffle: proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum( n_samples_per_cluster) shuffled_sample_indices = cp.array( np.random.choice(n_clusters, n_samples, replace=True, p=proba_samples_per_cluster)) for k, centroid in enumerate(centroids): centroid_indices = cp.where(shuffled_sample_indices == k) y[centroid_indices[0]] = k % n_classes X_k = X[centroid_indices[0], :n_informative] if _informative_covariance is None: A = 2 * generator.rand( n_informative, n_informative, dtype=dtype) - 1 else: A = _informative_covariance[k] X_k = cp.dot(X_k, A) # NOTE: This could be done outside the loop, but a current # cupy bug does not allow that # https://github.com/cupy/cupy/issues/3284 if n_redundant > 0: X[centroid_indices[0], n_informative:n_informative + n_redundant] = cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[centroid_indices[0], :n_informative] = X_k else: stop = 0 for k, centroid in enumerate(centroids): start, stop = stop, stop + n_samples_per_cluster[k] y[start:stop] = k % n_classes # assign labels X_k = X[start:stop, :n_informative] # slice a view of the cluster if _informative_covariance is None: A = 2 * generator.rand( n_informative, n_informative, dtype=dtype) - 1 else: A = _informative_covariance[k] X_k = cp.dot(X_k, A) # introduce random covariance if n_redundant > 0: X[start:stop, n_informative:n_informative + n_redundant] = \ cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[start:stop, :n_informative] = X_k # Repeat some features if n_repeated > 0: n = n_informative + n_redundant if _repeated_indices is None: indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) else: indices = _repeated_indices X[:, n:n + n_repeated] = X[:, indices] # Randomly replace labels if flip_y >= 0.0: flip_mask = generator.rand(n_samples, dtype=dtype) < flip_y y[flip_mask] = generator.randint(n_classes, size=int(flip_mask.sum())) # Randomly shift and scale if shift is None: shift = (2 * generator.rand(n_features, dtype=dtype) - 1) * class_sep X += shift if scale is None: scale = 1 + 100 * generator.rand(n_features, dtype=dtype) X *= scale return X, y
def fit(self, X, y=None) -> "KBinsDiscretizer": """ Fit the estimator. Parameters ---------- X : numeric array-like, shape (n_samples, n_features) Data to be discretized. y : None Ignored. This parameter exists only for compatibility with :class:`sklearn.pipeline.Pipeline`. Returns ------- self """ X = self._validate_data(X, dtype='numeric') valid_encode = ('onehot', 'onehot-dense', 'ordinal') if self.encode not in valid_encode: raise ValueError("Valid options for 'encode' are {}. " "Got encode={!r} instead.".format( valid_encode, self.encode)) valid_strategy = ('uniform', 'quantile', 'kmeans') if self.strategy not in valid_strategy: raise ValueError("Valid options for 'strategy' are {}. " "Got strategy={!r} instead.".format( valid_strategy, self.strategy)) n_features = X.shape[1] n_bins = self._validate_n_bins(n_features) n_bins = np.asnumpy(n_bins) bin_edges = cpu_np.zeros(n_features, dtype=object) for jj in range(n_features): column = X[:, jj] col_min, col_max = column.min(), column.max() if col_min == col_max: warnings.warn("Feature %d is constant and will be " "replaced with 0." % jj) n_bins[jj] = 1 bin_edges[jj] = np.array([-np.inf, np.inf]) continue if self.strategy == 'uniform': bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1) elif self.strategy == 'quantile': quantiles = np.linspace(0, 100, n_bins[jj] + 1) bin_edges[jj] = np.asarray(np.percentile(column, quantiles)) # Workaround for https://github.com/cupy/cupy/issues/4451 # This should be removed as soon as a fix is available in cupy # in order to limit alterations in the included sklearn code bin_edges[jj][-1] = col_max elif self.strategy == 'kmeans': # Deterministic initialization with uniform spacing uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1) init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5 # 1D k-means procedure km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, output_type='cupy') km = km.fit(column[:, None]) with using_output_type('cupy'): centers = km.cluster_centers_[:, 0] # Must sort, centers may be unsorted even with sorted init centers.sort() bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5 bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max] # Remove bins whose width are too small (i.e., <= 1e-8) if self.strategy in ('quantile', 'kmeans'): mask = np.diff(bin_edges[jj], prepend=-np.inf) > 1e-8 bin_edges[jj] = bin_edges[jj][mask] if len(bin_edges[jj]) - 1 != n_bins[jj]: warnings.warn('Bins whose width are too small (i.e., <= ' '1e-8) in feature %d are removed. Consider ' 'decreasing the number of bins.' % jj) n_bins[jj] = len(bin_edges[jj]) - 1 self.bin_edges_ = bin_edges self.n_bins_ = n_bins if 'onehot' in self.encode: self._encoder = OneHotEncoder(categories=np.array( [np.arange(i) for i in self.n_bins_]), sparse=self.encode == 'onehot', output_type='cupy') # Fit the OneHotEncoder with toy datasets # so that it's ready for use after the KBinsDiscretizer is fitted self._encoder.fit(np.zeros((1, len(self.n_bins_)), dtype=int)) return self
def test_cuDoubleComplex(self): N = 100 block = 32 grid = (N + block - 1) // block dtype = cupy.complex128 mod = cupy.RawModule( code=_test_cuComplex, translate_cucomplex=True) a = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) a = a.astype(dtype) b = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) b = b.astype(dtype) c = cupy.random.random((N,)) + 1j*cupy.random.random((N,)) c = c.astype(dtype) out = cupy.zeros((N,), dtype=dtype) out_float = cupy.zeros((N,), dtype=cupy.float64) out_down = cupy.zeros((N,), dtype=cupy.complex64) ker = mod.get_function('test_add') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all() ker = mod.get_function('test_sub') ker((grid,), (block,), (a, b, out)) assert (out == a - b).all() ker = mod.get_function('test_mul') ker((grid,), (block,), (a, b, out)) assert cupy.allclose(out, a * b) ker = mod.get_function('test_div') ker((grid,), (block,), (a, b, out)) assert (out == a / b).all() ker = mod.get_function('test_conj') ker((grid,), (block,), (a, out)) assert (out == cupy.conj(a)).all() ker = mod.get_function('test_abs') ker((grid,), (block,), (a, out_float)) assert (out_float == cupy.abs(a)).all() ker = mod.get_function('test_fma') ker((grid,), (block,), (a, b, c, out)) assert cupy.allclose(out, a * b + c) ker = mod.get_function('test_make') ker((grid,), (block,), (out,)) assert (out == 1.8 - 1j * 8.7).all() ker = mod.get_function('test_downcast') ker((grid,), (block,), (a, out_down)) assert (out_down == a.astype(cupy.complex64)).all() # NumPy scalars. b = cupy.complex128(2 + 3j) ker = mod.get_function('test_add_scalar') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all() # Python scalars. b = 2 + 3j ker = mod.get_function('test_add_scalar') ker((grid,), (block,), (a, b, out)) assert (out == a + b).all()
def einsum(*operands, **kwargs): """einsum(subscripts, *operands, dtype=False) Evaluates the Einstein summation convention on the operands. Using the Einstein summation convention, many common multi-dimensional array operations can be represented in a simple fashion. This function provides a way to compute such summations. .. note:: Memory contiguity of calculation result is not always compatible with `numpy.einsum`. ``out``, ``order``, and ``casting`` options are not supported. Args: subscripts (str): Specifies the subscripts for summation. operands (sequence of arrays): These are the arrays for the operation. Returns: cupy.ndarray: The calculation based on the Einstein summation convention. .. seealso:: :func:`numpy.einsum` """ input_subscripts, output_subscript, operands = \ _parse_einsum_input(operands) assert isinstance(input_subscripts, list) assert isinstance(operands, list) dtype = kwargs.pop('dtype', None) # casting = kwargs.pop('casting', 'safe') casting_kwargs = {} # casting is not supported yet in astype optimize = kwargs.pop('optimize', False) if optimize is True: optimize = 'greedy' if kwargs: raise TypeError('Did not understand the following kwargs: %s' % list(kwargs.keys)) result_dtype = cupy.result_type(*operands) if dtype is None else dtype operands = [ cupy.asanyarray(arr) for arr in operands ] input_subscripts = [ _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim) for idx, (sub, arr) in enumerate(zip(input_subscripts, operands)) ] # Get length of each unique dimension and ensure all dimensions are correct dimension_dict = {} for idx, sub in enumerate(input_subscripts): sh = operands[idx].shape for axis, label in enumerate(sub): dim = sh[axis] if label in dimension_dict.keys(): # For broadcasting cases we always want the largest dim size if dimension_dict[label] == 1: dimension_dict[label] = dim elif dim not in (1, dimension_dict[label]): dim_old = dimension_dict[label] raise ValueError( 'Size of label \'%s\' for operand %d (%d) ' 'does not match previous terms (%d).' % (_chr(label), idx, dim, dim_old)) else: dimension_dict[label] = dim if output_subscript is None: # Build output subscripts tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts)) output_subscript = [ label for label in sorted(set(tmp_subscripts)) if label < 0 or tmp_subscripts.count(label) == 1 ] else: if not options['sum_ellipsis']: if '@' not in output_subscript and -1 in dimension_dict: raise ValueError( 'output has more dimensions than subscripts ' 'given in einstein sum, but no \'...\' ellipsis ' 'provided to broadcast the extra dimensions.') output_subscript = _parse_ellipsis_subscript( output_subscript, None, ellipsis_len=sum(label < 0 for label in dimension_dict.keys()) ) # Make sure output subscripts are in the input tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts)) for label in output_subscript: if label not in tmp_subscripts: raise ValueError( 'einstein sum subscripts string included output subscript ' '\'%s\' which never appeared in an input' % _chr(label)) if len(output_subscript) != len(set(output_subscript)): for label in output_subscript: if output_subscript.count(label) >= 2: raise ValueError( 'einstein sum subscripts string includes output ' 'subscript \'%s\' multiple times' % _chr(label)) _einsum_diagonals(input_subscripts, operands) # no more raises if len(operands) >= 2: if any(arr.size == 0 for arr in operands): return cupy.zeros( tuple(dimension_dict[label] for label in output_subscript), dtype=result_dtype ) # Don't squeeze if unary, because this affects later (in trivial sum) # whether the return is a writeable view. for idx in six.moves.range(len(operands)): arr = operands[idx] if 1 in arr.shape: squeeze_indices = [] sub = [] for axis, label in enumerate(input_subscripts[idx]): if arr.shape[axis] == 1: squeeze_indices.append(axis) else: sub.append(label) input_subscripts[idx] = sub operands[idx] = cupy.squeeze(arr, axis=tuple(squeeze_indices)) assert operands[idx].ndim == len(input_subscripts[idx]) del arr # unary einsum without summation should return a (writeable) view returns_view = len(operands) == 1 # unary sum for idx, sub in enumerate(input_subscripts): other_subscripts = copy.copy(input_subscripts) other_subscripts[idx] = output_subscript other_subscripts = set(itertools.chain.from_iterable(other_subscripts)) sum_axes = tuple( axis for axis, label in enumerate(sub) if label not in other_subscripts ) if sum_axes: returns_view = False input_subscripts[idx] = [ label for axis, label in enumerate(sub) if axis not in sum_axes ] operands[idx] = operands[idx].sum( axis=sum_axes, dtype=result_dtype) if returns_view: operands = [a.view() for a in operands] else: operands = [ a.astype(result_dtype, copy=False, **casting_kwargs) for a in operands ] # no more casts optimize_algorithms = { 'greedy': _greedy_path, 'optimal': _optimal_path, } if optimize is False: path = [tuple(six.moves.range(len(operands)))] elif len(optimize) and (optimize[0] == 'einsum_path'): path = optimize[1:] else: try: if len(optimize) == 2 and isinstance(optimize[1], (int, float)): algo = optimize_algorithms[optimize[0]] memory_limit = int(optimize[1]) else: algo = optimize_algorithms[optimize] memory_limit = 2 ** 31 # TODO(kataoka): fix? except (TypeError, KeyError): # unhashable type or not found raise TypeError('Did not understand the path (optimize): %s' % str(optimize)) input_sets = [set(sub) for sub in input_subscripts] output_set = set(output_subscript) path = algo(input_sets, output_set, dimension_dict, memory_limit) if any(len(indices) > 2 for indices in path): warnings.warn( 'memory efficient einsum is not supported yet', util.PerformanceWarning) for idx0, idx1 in _iter_path_pairs(path): # "reduced" binary einsum arr0 = operands.pop(idx0) sub0 = input_subscripts.pop(idx0) arr1 = operands.pop(idx1) sub1 = input_subscripts.pop(idx1) sub_others = list(itertools.chain( output_subscript, itertools.chain.from_iterable(input_subscripts))) arr_out, sub_out = reduced_binary_einsum( arr0, sub0, arr1, sub1, sub_others) operands.append(arr_out) input_subscripts.append(sub_out) del arr0, arr1 # unary einsum at last arr0, = operands sub0, = input_subscripts transpose_axes = [] for label in output_subscript: if label in sub0: transpose_axes.append(sub0.index(label)) arr_out = arr0.transpose(transpose_axes).reshape([ dimension_dict[label] for label in output_subscript ]) assert returns_view or arr_out.dtype == result_dtype return arr_out
def _minor_reduce(self, ufunc, axis, nonzero): """Reduce nonzeros with a ufunc over the minor axis when non-empty Can be applied to a function of self.data by supplying data parameter. Warning: this does not call sum_duplicates() Args: ufunc (object): Function handle giving the operation to be conducted. axis (int): Matrix over which the reduction should be conducted. Returns: (cupy.ndarray): Reduce result for nonzeros in each major_index. """ # Call to the appropriate kernel function if axis == 1: # Create the vector to hold output value = cupy.zeros(self.shape[0]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[0], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[1]), value)) if axis == 0: # Create the vector to hold output value = cupy.zeros(self.shape[1]).astype(cupy.float64) if nonzero: # Perform the calculation if ufunc == cupy.amax: self._max_nonzero_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_nonzero_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) else: # Perform the calculation if ufunc == cupy.amax: self._max_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) if ufunc == cupy.amin: self._min_reduction_kern( (self.shape[1], ), (1, ), (self.data.astype( cupy.float64), self.indptr[:len(self.indptr) - 1], self.indptr[1:], cupy.int64(self.shape[0]), value)) return value