Example #1
0
 def test_multi_gpu(self):
     with cuda.Device(0):
         a = cupy.zeros((10,))
         cupy.core.core.scan(a)
     with cuda.Device(1):
         a = cupy.zeros((10,))
         cupy.core.core.scan(a)
  def basicComputeLoop(self, imp, params, inputSize, columnDimensions,
                       seed = None):
    """
    Feed in some vectors and retrieve outputs. Ensure the right number of
    columns win, that we always get binary outputs, and that nothing crashes.
    """
    sp = CreateSP(imp,params)

    # Create a set of input vectors as well as various numpy vectors we will
    # need to retrieve data from the SP
    numRecords = 100
    randomState = getNumpyRandomGenerator(seed)
    inputMatrix = (
      randomState.rand(numRecords,inputSize) > 0.8).astype(uintType)

    y = cupy.zeros(columnDimensions, dtype = uintType)
    dutyCycles = cupy.zeros(columnDimensions, dtype = uintType)

    # With learning on we should get the requested number of winners
    for v in inputMatrix:
      y.fill(0)
      sp.compute(v, True, y)
      self.assertEqual(sp.getNumActiveColumnsPerInhArea(),y.sum())
      self.assertEqual(0,y.min())
      self.assertEqual(1,y.max())

    # With learning off and some prior training we should get the requested
    # number of winners
    for v in inputMatrix:
      y.fill(0)
      sp.compute(v, False, y)
      self.assertEqual(sp.getNumActiveColumnsPerInhArea(),y.sum())
      self.assertEqual(0,y.min())
      self.assertEqual(1,y.max())
Example #3
0
def bincount(x, weights=None, minlength=None):
    """Count number of occurrences of each value in array of non-negative ints.

    Args:
        x (cupy.ndarray): Input array.
        weights (cupy.ndarray): Weights array which has the same shape as
            ``x``.
        minlength (int): A minimum number of bins for the output array.

    Returns:
        cupy.ndarray: The result of binning the input array. The length of
            output is equal to ``max(cupy.max(x) + 1, minlength)``.

    .. seealso:: :func:`numpy.bincount`

    """
    if x.ndim > 1:
        raise ValueError('object too deep for desired array')
    if x.ndim < 1:
        raise ValueError('object of too small depth for desired array')
    if x.dtype.kind == 'f':
        raise TypeError('x must be int array')
    if (x < 0).any():
        raise ValueError('The first argument of bincount must be non-negative')
    if weights is not None and x.shape != weights.shape:
        raise ValueError('The weights and list don\'t have the same length.')
    if minlength is not None:
        minlength = int(minlength)
        if minlength <= 0:
            raise ValueError('minlength must be positive')

    size = int(cupy.max(x)) + 1
    if minlength is not None:
        size = max(size, minlength)

    if weights is None:
        # atomicAdd for int64 is not provided
        b = cupy.zeros((size,), dtype=cupy.int32)
        cupy.ElementwiseKernel(
            'S x', 'raw U bin',
            'atomicAdd(&bin[x], 1)',
            'bincount_kernel'
        )(x, b)
        b = b.astype(numpy.intp)
    else:
        # atomicAdd for float64 is not provided
        b = cupy.zeros((size,), dtype=cupy.float32)
        cupy.ElementwiseKernel(
            'S x, T w', 'raw U bin',
            'atomicAdd(&bin[x], w)',
            'bincount_with_weight_kernel'
        )(x, weights, b)
        b = b.astype(cupy.float64)

    return b
Example #4
0
 def test_22(self):
     N = 32
     M = 4
     Nd = 8
     D = cp.random.randn(Nd, Nd, M)
     D /= cp.sqrt(cp.sum(D**2, axis=(0, 1)))
     X0 = cp.zeros((N, N, M))
     xr = cp.random.randn(N, N, M)
     xp = cp.abs(xr) > 3
     X0[xp] = cp.random.randn(X0[xp].size)
     S = cp.sum(sl.fftconv(D, X0), axis=2)
     lmbda = 1e-3
     opt = cbpdn.ConvBPDN.Options(
         {'Verbose': False, 'MaxMainIter': 500, 'RelStopTol': 1e-5,
          'rho': 5e-1, 'AutoRho': {'Enabled': False}})
     bp = cbpdn.ConvBPDN(D, S, lmbda, opt)
     Xp = bp.solve()
     epsilon = cp.linalg.norm(bp.reconstruct(Xp).squeeze() - S)
     opt = cbpdn.ConvMinL1InL2Ball.Options(
         {'Verbose': False, 'MaxMainIter': 500, 'RelStopTol': 1e-5,
          'rho': 2e2, 'RelaxParam': 1.0, 'AutoRho': {'Enabled': False}})
     bc = cbpdn.ConvMinL1InL2Ball(D, S, epsilon=epsilon, opt=opt)
     Xc = bc.solve()
     assert cp.linalg.norm(Xp - Xc) / cp.linalg.norm(Xp) < 1e-3
     assert(cp.abs(cp.linalg.norm(Xp.ravel(), 1) -
                   cp.linalg.norm(Xc.ravel(), 1)) < 1e-3)
 def test_adv_getitem_cupy_indices2(self):
     shape = (2, 3, 4)
     a = cupy.zeros(shape)
     index = cupy.array([1, 0])
     b = a[(slice(None), index)]
     b_cpu = a.get()[(slice(None), index.get())]
     testing.assert_array_equal(b, b_cpu)
Example #6
0
def diag(v, k=0):
    """Returns a diagonal or a diagonal array.

    Args:
        v (array-like): Array or array-like object.
        k (int): Index of diagonals. Zero indicates the main diagonal, a
            positive value an upper diagonal, and a negative value a lower
            diagonal.

    Returns:
        cupy.ndarray: If ``v`` indicates a 1-D array, then it returns a 2-D
        array with the specified diagonal filled by ``v``. If ``v`` indicates a
        2-D array, then it returns the specified diagonal of ``v``. In latter
        case, if ``v`` is a cupy.ndarray object, then its view is returned.

    .. seealso:: :func:`numpy.diag`

    """
    if isinstance(v, cupy.ndarray):
        if v.ndim == 1:
            size = v.size + abs(k)
            ret = cupy.zeros((size, size), dtype=v.dtype)
            ret.diagonal(k)[:] = v
            return ret
        else:
            return v.diagonal(k)
    else:
        return cupy.array(numpy.diag(v, k))
Example #7
0
 def test_scatter_add_cupy_arguments(self, dtype):
     shape = (2, 3)
     a = cupy.zeros(shape, dtype)
     slices = (cupy.array([1, 1]), slice(None))
     a.scatter_add(slices, cupy.array(1.))
     testing.assert_array_equal(
         a, cupy.array([[0., 0., 0.], [2., 2., 2.]], dtype))
Example #8
0
  def testVeryFewScores(self):
    """
    This calls estimateAnomalyLikelihoods and updateAnomalyLikelihoods
    with one or no scores.
    """

    # Generate an estimate using two data points
    data1 = _generateSampleData(mean=42.0, variance=1e-10)

    _, _, estimatorParams = (
      an.estimateAnomalyLikelihoods(data1[0:2])
    )

    self.assertTrue(an.isValidEstimatorParams(estimatorParams))

    # Check that the estimated mean is that value
    dParams = estimatorParams["distribution"]
    self.assertWithinEpsilon(dParams["mean"], data1[0][2])

    # Can't generate an estimate using no data points
    data1 = cupy.zeros(0)
    with self.assertRaises(ValueError):
      an.estimateAnomalyLikelihoods(data1)

    # Can't update with no scores
    with self.assertRaises(ValueError):
      an.updateAnomalyLikelihoods(data1, estimatorParams)
Example #9
0
  def testSamplePopulationTooSmall(self):
    r = Random(42)
    population = cupy.array([1, 2, 3, 4], dtype="uint32")
    choices = cupy.zeros([5], dtype="uint32")

    self.assertRaises(
        ValueError, r.sample, population, choices)
Example #10
0
 def predict(self,x,train=False, ratio = 0.5):
     y = xp.zeros(OUTPUT_NODE)
     for i in range(KIND_OF_HEADS-1):
         y += self.heads[i][0].predict(x,train,ratio).data[0].copy()/(KIND_OF_HEADS-1)
     y /= 2.0
     y += self.heads[KIND_OF_HEADS-1][0].predict(x,train,ratio).data[0].copy()/2.0
     return y
Example #11
0
  def testShuffleEmpty(self):
    r = Random(42)
    arr = cupy.zeros([0], dtype="uint32")

    r.shuffle(arr)

    self.assertEqual(arr.size, 0)
Example #12
0
  def testSampleWrongDimensionsChoices(self):
    """Check that passing a multi-dimensional array throws a ValueError."""
    r = Random(42)
    population = cupy.array([1, 2, 3, 4], dtype="uint32")
    choices = cupy.zeros([2, 2], dtype="uint32")

    self.assertRaises(ValueError, r.sample, population, choices)
Example #13
0
 def test_scatter_add_cupy_arguments_mask(self, dtype):
     shape = (2, 3)
     a = cupy.zeros(shape, dtype)
     slices = (cupy.array([True, False]), slice(None))
     a.scatter_add(slices, cupy.array(1.))
     testing.assert_array_equal(
         a, cupy.array([[1., 1., 1.], [0., 0., 0.]], dtype))
 def test_adv_getitem_cupy_indices3(self):
     shape = (2, 3, 4)
     a = cupy.zeros(shape)
     index = cupy.array([True, False])
     b = a[index]
     b_cpu = a.get()[index.get()]
     testing.assert_array_equal(b, b_cpu)
 def test_cupy_indices_integer_array(self):
     shape = (2, 3)
     a = cupy.zeros(shape)
     indexes = cupy.array([0, 1])
     a[:, indexes] = cupy.array(1.)
     testing.assert_array_equal(
         a, cupy.array([[1., 1., 0.], [1., 1., 0.]]))
 def test_cupy_indices_boolean_array(self):
     shape = (2, 3)
     a = cupy.zeros(shape)
     indexes = cupy.array([True, False])
     a[indexes] = cupy.array(1.)
     testing.assert_array_equal(
         a, cupy.array([[1., 1., 1.], [0., 0., 0.]]))
Example #17
0
  def testSample(self):
    r = Random(42)
    population = cupy.array([1, 2, 3, 4], dtype="uint32")
    choices = cupy.zeros([2], dtype="uint32")

    r.sample(population, choices)

    self.assertEqual(choices[0], 1)
    self.assertEqual(choices[1], 3)
Example #18
0
  def testSampleNone(self):
    r = Random(42)
    population = cupy.array([1, 2, 3, 4], dtype="uint32")
    choices = cupy.zeros([0], dtype="uint32")

    # Just make sure there is no exception thrown.
    r.sample(population, choices)

    self.assertEqual(choices.size, 0)
Example #19
0
def zeros(shape, dtype=numpy.float32, stream=None):
    """Creates a zero-filled cupy.ndarray object.

    This function is equivalent to ``full(shape, 0, dtype, stream)``.

    """
    warnings.warn("chainer.cuda.zeros is deprecated. Use cupy.zeros instead.", DeprecationWarning)
    check_cuda_available()
    assert stream is None
    return cupy.zeros(shape, dtype=dtype)
Example #20
0
    def test_scatter_add_differnt_dtypes(self, src_dtype, dst_dtype):
        shape = (2, 3)
        a = cupy.zeros(shape, dtype=src_dtype)
        value = cupy.array(1, dtype=dst_dtype)
        slices = ([1, 1], slice(None))
        a.scatter_add(slices, value)

        numpy.testing.assert_almost_equal(
            a.get(),
            numpy.array([[0, 0, 0], [2, 2, 2]], dtype=src_dtype))
Example #21
0
    def test_scatter_add_differnt_dtypes_mask(self, src_dtype, dst_dtype):
        shape = (2, 3)
        a = cupy.zeros(shape, dtype=src_dtype)
        value = cupy.array(1, dtype=dst_dtype)
        slices = (numpy.array([[True, False, False], [False, True, True]]))
        a.scatter_add(slices, value)

        numpy.testing.assert_almost_equal(
            a.get(),
            numpy.array([[1, 0, 0], [0, 1, 1]], dtype=src_dtype))
Example #22
0
def get_features(docs, max_length):
    docs = list(docs)
    Xs = xp.zeros((len(docs), max_length), dtype='i')
    for i, doc in enumerate(docs):
        j = 0
        for token in doc:
            if token.has_vector and not token.is_punct and not token.is_space:
                Xs[i, j] = token.norm
                j += 1
                if j >= max_length:
                    break
    return Xs
Example #23
0
def _getSimplePatterns(numOnes, numPatterns):
  """Very simple patterns. Each pattern has numOnes consecutive
  bits on. There are numPatterns*numOnes bits in the vector. These patterns
  are used as elements of sequences when building up a training set."""

  numCols = numOnes * numPatterns
  p = []
  for i in xrange(numPatterns):
    x = np.zeros(numCols, dtype='float32')
    x[i*numOnes:(i + 1)*numOnes] = 1
    p.append(x)

  return p
Example #24
0
    def interval(self, mx, size):
        """Generate multiple integers independently sampled uniformly from ``[0, mx]``.

        Args:
            mx (int): Upper bound of the interval
            size (None or int or tuple): Shape of the array or the scalar
                returned.
        Returns:
            int or cupy.ndarray: If ``None``, an :class:`cupy.ndarray` with
            shape ``()`` is returned.
            If ``int``, 1-D array of length size is returned.
            If ``tuple``, multi-dimensional array with shape
            ``size`` is returned.
            Currently, each element of the array is ``numpy.int32``.
        """
        dtype = numpy.int32
        if size is None:
            return self.interval(mx, 1).reshape(())
        elif isinstance(size, int):
            size = (size, )

        if mx == 0:
            return cupy.zeros(size, dtype=dtype)

        mask = (1 << mx.bit_length()) - 1
        mask = cupy.array(mask, dtype=dtype)

        ret = cupy.zeros(size, dtype=dtype)
        sample = cupy.zeros(size, dtype=dtype)
        done = cupy.zeros(size, dtype=numpy.bool_)
        while True:
            curand.generate(
                self._generator, sample.data.ptr, sample.size)
            sample &= mask
            success = sample <= mx
            ret = cupy.where(success, sample, ret)
            done |= success
            if done.all():
                return ret
Example #25
0
    def check_normal(self, curand_func, dtype):
        shape = core.get_size(self.size)
        exp_size = six.moves.reduce(operator.mul, shape, 1)
        if exp_size % 2 == 1:
            exp_size += 1

        curand_func.return_value = cupy.zeros(exp_size, dtype=dtype)
        out = self.rs.normal(self.args[0], self.args[1], self.size, dtype)
        gen, _, size, loc, scale = curand_func.call_args[0]
        self.assertIs(gen, self.rs._generator)
        self.assertEqual(size, exp_size)
        self.assertIs(loc, self.args[0])
        self.assertIs(scale, self.args[1])
        self.assertEqual(out.shape, shape)
Example #26
0
def getNoWhitespaces(IDs, whitespace_IDs):
    no_whitespace_ids = whitespace_IDs.shape[0]
    T = IDs.shape[0]
    N = IDs.shape[1]
    no_whitespaces = cp.zeros((N,), dtype=np.int32)
    
    _GetNoWhitespaces = _GetNoWhitespaces_kernel()
    
    bdim, gdim = (16,1,1), (1,N,1)

    _GetNoWhitespaces(grid=gdim, block=bdim,
          args=(IDs, whitespace_IDs, no_whitespaces,
                no_whitespace_ids, T, N)
            )
    return no_whitespaces
Example #27
0
def zeros_like(array, stream=None):
    """Creates a zero-filled cupy.ndarray object like the given array.

    Args:
        array (cupy.ndarray or numpy.ndarray): Base array.
        stream (cupy.cuda.Stream): CUDA stream.

    Returns:
        cupy.ndarray: Zero-filled array.

    """
    warnings.warn("chainer.cuda.zeros_like is deprecated. Use cupy.zeros_like instead.", DeprecationWarning)
    check_cuda_available()
    assert stream is None
    if isinstance(array, cupy.ndarray):
        return cupy.zeros_like(array)
    return cupy.zeros(array.shape, dtype=array.dtype)
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return cp.zeros((0,), dtype=np.int32)

    n_bbox = bbox.shape[0]

    if score is not None:
        order = score.argsort()[::-1].astype(np.int32)
    else:
        order = cp.arange(n_bbox, dtype=np.int32)

    sorted_bbox = bbox[order, :]
    selec, n_selec = _call_nms_kernel(
        sorted_bbox, thresh)
    selec = selec[:n_selec]
    selec = order[selec]
    if limit is not None:
        selec = selec[:limit]
    return selec
Example #30
0
  def testGetstateSetstate(self):
    nDims = 32 # need multiple of 8, because of sse
    nClass = 4
    size = 20
    labels = _RGEN.random_integers(0, nClass - 1, size)
    samples = np.zeros((size, nDims), dtype=_DTYPE)

    centers = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

    for i in range(0, size):
      t = 6.28 * _RGEN.random_sample()
      samples[i][0] = 2 * centers[labels[i]][0] + 0.5*_RGEN.rand() * np.cos(t)
      samples[i][1] = 2 * centers[labels[i]][1] + 0.5*_RGEN.rand() * np.sin(t)

    classifier = svm_dense(0, nDims, seed=_SEED, probability = True)

    for y, xList in zip(labels, samples):
      x = np.array(xList, dtype=_DTYPE)
      classifier.add_sample(float(y), x)

    classifier.train(gamma=1.0/3.0, C=100, eps=1e-1)
    classifier.cross_validate(2, gamma=0.5, C=10, eps=1e-3)

    s1 = classifier.__getstate__()
    h1 = hashlib.md5(s1).hexdigest()

    classifier2 = svm_dense(0, nDims)
    classifier2.__setstate__(s1)
    s2 = classifier2.__getstate__()
    h2 = hashlib.md5(s2).hexdigest()

    self.assertEqual(h1, h2)

    with open("svm_test.bin", "wb") as f:
      pickle.dump(classifier, f)
    with open("svm_test.bin", "rb") as f:
      classifier3 = pickle.load(f)
    s3 = classifier3.__getstate__()
    h3 = hashlib.md5(s3).hexdigest()

    self.assertEqual(h1, h3)

    os.unlink("svm_test.bin")
Example #31
0
def affine_transform(
    input,
    matrix,
    offset=0.0,
    output_shape=None,
    output=None,
    order=3,
    mode="constant",
    cval=0.0,
    prefilter=True,
    *,
    allow_float32=True,
):
    """Apply an affine transformation.

    Given an output image pixel index vector ``o``, the pixel value is
    determined from the input image at position
    ``cupy.dot(matrix, o) + offset``.

    Args:
        input (cupy.ndarray): The input array.
        matrix (cupy.ndarray): The inverse coordinate transformation matrix,
            mapping output coordinates to input coordinates. If ``ndim`` is the
            number of dimensions of ``input``, the given matrix must have one
            of the following shapes:

                - ``(ndim, ndim)``: the linear transformation matrix for each
                  output coordinate.
                - ``(ndim,)``: assume that the 2D transformation matrix is
                  diagonal, with the diagonal specified by the given value.
                - ``(ndim + 1, ndim + 1)``: assume that the transformation is
                  specified using homogeneous coordinates. In this case, any
                  value passed to ``offset`` is ignored.
                - ``(ndim, ndim + 1)``: as above, but the bottom row of a
                  homogeneous transformation matrix is always
                  ``[0, 0, ..., 1]``, and may be omitted.

        offset (float or sequence): The offset into the array where the
            transform is applied. If a float, ``offset`` is the same for each
            axis. If a sequence, ``offset`` should contain one value for each
            axis.
        output_shape (tuple of ints): Shape tuple.
        output (cupy.ndarray or ~cupy.dtype): The array in which to place the
            output, or the dtype of the returned array.
        order (int): The order of the spline interpolation. Must be between 0
            and 5.
        mode (str): Points outside the boundaries of the input are filled
            according to the given mode (``'constant'``, ``'nearest'``,
            ``'mirror'`` or ``'opencv'``). Default is ``'constant'``.
        cval (scalar): Value used for points outside the boundaries of
            the input if ``mode='constant'`` or ``mode='opencv'``. Default is
            0.0
        prefilter (bool): It is not used yet. It just exists for compatibility
            with :mod:`scipy.ndimage`.

    Returns:
        cupy.ndarray or None:
            The transformed input. If ``output`` is given as a parameter,
            ``None`` is returned.

    Notes
    -----
    This implementation handles boundary modes 'wrap' and 'reflect' correctly,
    while SciPy does not (at least as of release 1.4.0). So, if comparing to
    SciPy, some disagreement near the borders may occur unless
    ``mode == 'mirror'``.

    For ``order > 1`` with ``prefilter == True``, the spline prefilter boundary
    conditions are implemented correctly only for modes 'mirror', 'reflect'
    and 'wrap'. For the other modes ('constant' and 'nearest'), there is some
    innacuracy near the boundary of the array.

    .. seealso:: :func:`scipy.ndimage.affine_transform`
    """

    _check_parameter("affine_transform", order, mode)

    if not hasattr(offset, "__iter__") and type(offset) is not cupy.ndarray:
        offset = [offset] * input.ndim

    matrix = cupy.asarray(matrix, order="C", dtype=float)
    if matrix.ndim not in [1, 2]:
        raise RuntimeError("no proper affine matrix provided")
    if matrix.ndim == 2:
        if matrix.shape[0] == matrix.shape[1] - 1:
            offset = matrix[:, -1]
            matrix = matrix[:, :-1]
        elif matrix.shape[0] == input.ndim + 1:
            offset = matrix[:-1, -1]
            matrix = matrix[:-1, :-1]

    if mode == "opencv":
        m = cupy.zeros((input.ndim + 1, input.ndim + 1), dtype=float)
        m[:-1, :-1] = matrix
        m[:-1, -1] = offset
        m[-1, -1] = 1
        m = cupy.linalg.inv(m)
        m[:2] = cupy.roll(m[:2], 1, axis=0)
        m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1)
        matrix = m[:-1, :-1]
        offset = m[:-1, -1]

    if output_shape is None:
        output_shape = input.shape

    matrix = matrix.astype(float, copy=False)
    if order is None:
        order = 1
    ndim = input.ndim
    output = _get_output(output, input, shape=output_shape)
    if input.dtype.kind in "iu":
        input = input.astype(cupy.float32)

    if prefilter and order > 1:
        padded, npad = _prepad_for_spline_filter(input, mode, cval)
        filtered = spline_filter(
            padded,
            order,
            output=input.dtype,
            mode=mode,
            allow_float32=allow_float32,
        )
    else:
        npad = 0
        filtered = input

    # kernel assumes C-contiguous arrays
    if not filtered.flags.c_contiguous:
        filtered = cupy.ascontiguousarray(filtered)
    if not matrix.flags.c_contiguous:
        matrix = cupy.ascontiguousarray(matrix)

    integer_output = output.dtype.kind in "iu"
    large_int = (max(_misc._prod(input.shape), _misc._prod(output_shape)) >
                 1 << 31)
    if matrix.ndim == 1:
        offset = cupy.asarray(offset, dtype=float, order="C")
        offset = -offset / matrix
        kern = _get_zoom_shift_kernel(
            ndim,
            large_int,
            output_shape,
            mode,
            cval=cval,
            order=order,
            integer_output=integer_output,
            nprepad=npad,
        )
        kern(filtered, offset, matrix, output)
    else:
        kern = _get_affine_kernel(
            ndim,
            large_int,
            output_shape,
            mode,
            cval=cval,
            order=order,
            integer_output=integer_output,
            nprepad=npad,
        )
        m = cupy.zeros((ndim, ndim + 1), dtype=float)
        m[:, :-1] = matrix
        m[:, -1] = cupy.asarray(offset, dtype=float)
        kern(filtered, m, output)
    return output
Example #32
0
def call_adam(grad, data, state_m, state_v):
    adam(grad, hp.lr, 1 - hp.beta1, 1 - hp.beta2, hp.eps, hp.eta,
         hp.weight_decay_rate, data, state_m, state_v)


def call_adam_fuse(grad, data, state_m, state_v):
    adam_fuse(grad, numpy.float32(hp.lr), numpy.float32(1 - hp.beta1),
              numpy.float32(1 - hp.beta2), numpy.float32(hp.eps),
              numpy.float32(hp.eta), numpy.float32(hp.weight_decay_rate), data,
              state_m, state_v)


sizes = [1, 10, 100, 1000, 2000, 5000]
for size in sizes:
    zero = cupy.zeros((size, size))

    def f():
        call_adam(zero, zero, zero, zero)

    util.measure(f, "adam     , %4d" % (size), 100)

for size in sizes:
    zero = cupy.zeros((size, size))

    def f():
        call_adam_fuse(zero, zero, zero, zero)

    util.measure(f, "adam_fuse, %4d" % (size), 100)

for size in sizes[:4]:
Example #33
0
rv = np.sqrt(xv**2 + yv**2 + zv**2)
thetav = np.arccos(zv / rv)
phiv = np.arctan2(yv, xv)
del xv, yv, zv, jv, iv

# Load arrays onto GPU using CuPy module
# theta_tol_array = cp.array(theta_tol_array)
# phi_tol_array = cp.array(phi_tol_array)
theta_img = cp.array(theta_img)
phi_img = cp.array(phi_img)
thetav = cp.array(thetav)
phiv = cp.array(phiv)
rv = cp.array(rv)

# Initialize label array
labels = cp.zeros((ni, nj, 111))

# Doing all computations on GPU 1
with cp.cuda.Device(1):
    # Condition to search only in field of view
    #limit_theta = thetav <= theta_max
    # Set closeness tolerance
    # Redefine useful variables so they are on the GPU
    # Reduces data transfer time
    ni = 480
    nj = 480
    theta_max = 4 / 9 * cp.pi
    ni0 = ni / 2 - 0.5
    nj0 = nj / 2 - 0.5
    dimk, dimi, dimj = rv.shape
    start = 76
Example #34
0
    def test_cuFloatComplex(self):
        N = 100
        block = 32
        grid = (N + block - 1) // block
        dtype = cupy.complex64

        mod = cupy.RawModule(
            code=_test_cuComplex,
            translate_cucomplex=True)
        a = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        a = a.astype(dtype)
        b = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        b = b.astype(dtype)
        c = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        c = c.astype(dtype)
        out = cupy.zeros((N,), dtype=dtype)
        out_float = cupy.zeros((N,), dtype=cupy.float32)
        out_up = cupy.zeros((N,), dtype=cupy.complex128)

        ker = mod.get_function('test_addf')
        ker((grid,), (block,), (a, b, out))
        assert (out == a + b).all()

        ker = mod.get_function('test_subf')
        ker((grid,), (block,), (a, b, out))
        assert (out == a - b).all()

        ker = mod.get_function('test_mulf')
        ker((grid,), (block,), (a, b, out))
        assert cupy.allclose(out, a * b)

        ker = mod.get_function('test_divf')
        ker((grid,), (block,), (a, b, out))
        assert (out == a / b).all()

        ker = mod.get_function('test_conjf')
        ker((grid,), (block,), (a, out))
        assert (out == cupy.conj(a)).all()

        ker = mod.get_function('test_absf')
        ker((grid,), (block,), (a, out_float))
        assert (out_float == cupy.abs(a)).all()

        ker = mod.get_function('test_fmaf')
        ker((grid,), (block,), (a, b, c, out))
        assert cupy.allclose(out, a * b + c)

        ker = mod.get_function('test_makef')
        ker((grid,), (block,), (out,))
        # because of precision issue, the (A==B).all() semantics would fail
        assert cupy.allclose(out, 1.8 - 1j * 8.7)

        ker = mod.get_function('test_upcast')
        ker((grid,), (block,), (a, out_up))
        assert (out_up == a.astype(cupy.complex128)).all()

        # NumPy scalars.
        b = cupy.complex64(2 + 3j)
        ker = mod.get_function('test_addf_scalar')
        ker((grid,), (block,), (a, b, out))
        assert (out == a + b).all()
Example #35
0
 def test_zeros_strides_f(self):
     a = numpy.zeros((2, 3), dtype='d', order='F')
     b = cupy.zeros((2, 3), dtype='d', order='F')
     self.assertEqual(b.strides, a.strides)
Example #36
0
 def testNotContiguous(self):
     sbuf = cupy.ones([3, 2])[:, 0]
     rbuf = cupy.zeros([3])
     self.assertRaises((BufferError, ValueError), Sendrecv, sbuf, rbuf)
Example #37
0
Nframe = 50  # Saves data every Nframe time steps
dt = -1j * 1e-2  # Time step
t = 0.

# --------------------------------------------------------------------------------------------------------------------
# Generating initial state:
# --------------------------------------------------------------------------------------------------------------------
phi = cp.arctan2(Y, X)  # Phase is azimuthal angle around the core

Tf = sm.get_TF_density_3d(c0, c2, X, Y, Z, N=1)

eta = np.where(Z <= 0, 0, 1)  # Parameter used to interpolate between states

# Generate initial wavefunctions:
psiP2 = cp.sqrt(Tf) * cp.sqrt((1 + eta**2)) / 2
psiP1 = cp.zeros((Nx, Ny, Nz))
psi0 = cp.sqrt(Tf) * 1j * cp.sqrt((1 - eta**2) / 2)
psiM1 = cp.zeros((Nx, Ny, Nz))
psiM2 = cp.sqrt(Tf) * cp.sqrt((1 + eta**2)) / 2

Psi = [psiP2, psiP1, psi0, psiM1, psiM2]  # Full 5x1 wavefunction

# Spin rotation on wavefunction:
alpha_angle = 0
beta_angle = 0.01
gamma_angle = 0

Psi = sm.rotation(Psi, alpha_angle, beta_angle, gamma_angle)
N = [dx * dy * cp.sum(cp.abs(wfn)**2)
     for wfn in Psi]  # Atom number of each component
theta_fix = [cp.angle(wfn) for wfn in Psi]
Example #38
0
    def __init__(self,
                 n_layers,
                 n_samples,
                 n,
                 n_extended,
                 beta,
                 kappa,
                 sigma_0,
                 sigma_v,
                 sigma_scaling,
                 meas_std,
                 evaluation_interval,
                 printProgress,
                 seed,
                 burn_percentage,
                 enable_beta_feedback,
                 pcn_variant,
                 phantom_name,
                 meas_type='tomo',
                 n_theta=50):
        self.n_samples = n_samples
        self.evaluation_interval = evaluation_interval
        self.burn_percentage = burn_percentage
        #set random seed
        self.random_seed = seed
        self.printProgress = printProgress
        self.n_layers = n_layers
        self.kappa = kappa
        self.sigma_0 = sigma_0
        self.sigma_v = sigma_v
        self.sigma_scaling = sigma_scaling
        self.enable_beta_feedback = enable_beta_feedback
        cp.random.seed(self.random_seed)

        #setup parameters for 1 Dimensional simulation
        self.d = 2
        self.nu = 2 - self.d / 2
        self.alpha = self.nu + self.d / 2
        self.t_start = -0.5
        self.t_end = 0.5
        self.beta_u = (sigma_0**2) * (2**self.d * util.PI**(self.d / 2) *
                                      ssp.gamma(self.alpha)) / ssp.gamma(
                                          self.nu)
        self.beta_v = self.beta_u * (sigma_v / sigma_0)**2
        self.sqrtBeta_v = cp.sqrt(self.beta_v).astype('float32')
        self.sqrtBeta_0 = cp.sqrt(self.beta_u).astype('float32')

        f = FourierAnalysis_2D(n, n_extended, self.t_start, self.t_end)
        self.fourier = f

        rg = RandomGenerator_2D(f.basis_number)
        self.random_gen = rg

        LuReal = ((f.Dmatrix * self.kappa**(-self.nu) -
                   self.kappa**(2 - self.nu) * f.Imatrix) /
                  self.sqrtBeta_0).astype('float32')
        Lu = LuReal + 1j * cp.zeros(LuReal.shape, dtype=cp.float32)

        uStdev_sym = -1 / cp.diag(Lu)
        uStdev = uStdev_sym[f.basis_number_2D_ravel - 1:]
        uStdev[0] /= 2  #scaled

        if meas_type == 'tomo':
            self.measurement = Sinogram(
                phantom_name,
                target_size=2 * f.extended_basis_number - 1,
                n_theta=n_theta,
                stdev=meas_std,
                relative_location='phantom_images')
        else:
            self.measurement = TwoDMeasurement(
                phantom_name,
                target_size=2 * f.extended_basis_number - 1,
                stdev=meas_std,
                relative_location='phantom_images')

        self.pcn_variant = pcn_variant
        self.pcn = pCN(n_layers, rg, self.measurement, f, beta,
                       self.pcn_variant)
        # self.pcn_pair_layers = pcn_pair_layers

        self.pcn.record_skip = np.max(
            cp.array([1, self.n_samples // self.pcn.max_record_history]))
        history_length = np.min(
            np.array([self.n_samples, self.pcn.max_record_history]))
        self.pcn.sqrtBetas_history = np.empty((history_length, self.n_layers),
                                              dtype=np.float64)
        Layers = []
        for i in range(self.n_layers):
            if i == 0:
                init_sample_sym = uStdev_sym * self.pcn.random_gen.construct_w(
                )
                lay = Layer(True, self.sqrtBeta_0, i, n_samples, self.pcn,
                            init_sample_sym)
                lay.LMat.current_L = Lu
                lay.LMat.latest_computed_L = Lu
                lay.stdev_sym = uStdev_sym
                lay.stdev = uStdev
            else:
                if i == n_layers - 1:
                    lay = Layer(False, self.sqrtBeta_v, i, self.n_samples,
                                self.pcn, Layers[i - 1].current_sample_sym)
                    wNew = self.pcn.random_gen.construct_w()
                    eNew = cp.random.randn(self.pcn.measurement.num_sample,
                                           dtype=cp.float32)
                    wBar = cp.concatenate((eNew, wNew))
                    LBar = cp.vstack((self.pcn.H, lay.LMat.current_L))
                    lay.current_sample_sym, res, rnk, s = cp.linalg.lstsq(
                        LBar, self.pcn.yBar - wBar, rcond=-1)  #,rcond=None)
                    lay.current_sample = lay.current_sample_sym[
                        f.basis_number_2D_ravel - 1:]
                else:
                    # lay = layer.Layer(False, sqrtBeta_v*np.sqrt(sigma_scaling),i, n_samples, pcn,Layers[i-1].current_sample)
                    lay = Layer(False, self.sqrtBeta_v * 0.1, i,
                                self.n_samples, self.pcn,
                                Layers[i - 1].current_sample)

            lay.update_current_sample()
            self.pcn.Layers_sqrtBetas[i] = lay.sqrt_beta
            lay.samples_history = np.empty(
                (history_length, self.pcn.fourier.basis_number_2D_ravel),
                dtype=np.complex64)
            Layers.append(lay)

        self.Layers = Layers
Example #39
0
    def compressed_allreduce(self, buffer_m: torch.tensor, worker_error,
                             server_error, local_rank):

        # all_start_time = time.time()
        original_shape = buffer_m.size()
        if len(original_shape) > 1:
            buffer_m = torch.flatten(buffer_m)
        original_size = buffer_m.numel()
        worker_error_size = worker_error.numel()
        cupy.cuda.Device(local_rank).use()

        if original_size != worker_error_size:
            empty_tensor = torch.zeros(worker_error_size - original_size,
                                       device=buffer_m.device)
            buffer_m = torch.cat([buffer_m, empty_tensor])

        buffer_m.add_(worker_error)
        worker_scale = torch.norm(buffer_m) / np.sqrt(torch.numel(buffer_m))
        worker_error.set_(
            buffer_m - worker_scale *
            buffer_m.sign().add_(1).bool().float().add_(-0.5).mul_(2.0))

        cupy_sign_list_packed = self.compression_backend.compress_by_chunk(
            self.compression_backend.torch2cupy(
                buffer_m.sign_().add_(1).bool()), self.size)
        cupy_worker_scale = self.compression_backend.torch2cupy(worker_scale)

        cupy_recvbuf_sign = cupy.zeros(
            [self.size, cupy_sign_list_packed[self.rank].size],
            dtype=cupy_sign_list_packed[0].dtype)
        # cupy_recvbuf_scale = cupy.zeros([self.size, 1], dtype=cupy_worker_scale.dtype)

        sign_list_packed = [
            self.compression_backend.cupy2torch(cupy_sign_list_packed[idx])
            for idx in range(self.size)
        ]

        # worker_scale = self.compression_backend.cupy2torch(cupy_worker_scale)
        recvbuf_sign = self.compression_backend.cupy2torch(cupy_recvbuf_sign)
        #recvbuf_scale = self.compression_backend.cupy2torch(cupy_recvbuf_scale)
        recvbuf_scale = [
            torch.zeros(1,
                        dtype=worker_scale.dtype,
                        device=torch.device(local_rank))
            for i in range(self.size)
        ]

        # communication phase 1
        # gather_start = time.time()
        # Alltoall for sign
        dist.all_to_all_single(recvbuf_sign, torch.stack(sign_list_packed))
        # Allgather for scale
        dist.all_gather(recvbuf_scale, worker_scale)

        # gather_end = time.time()

        # cupy_sign_list_packed, sign_list_packed, cupy_worker_scale, worker_scale = None, None, None, None
        cupy_sign_list_packed = None

        cupy_recvbuf_sign = self.compression_backend.torch2cupy(recvbuf_sign)
        #cupy_recvbuf_scale = self.compression_backend.torch2cupy(torch.stack(recvbuf_scale))

        compensated_server_m = self.compression_backend.cupy2torch(
            (cupy.unpackbits(cupy_recvbuf_sign.flatten())).reshape(
                self.size, -1)).float().add_(-0.5).mul_(2.0).mul_(
                    torch.stack(recvbuf_scale).mul_(1 / self.size)).sum(0)
        compensated_server_m.add_(server_error)
        server_scale = torch.norm(compensated_server_m) / np.sqrt(
            compensated_server_m.numel())
        server_error.set_(compensated_server_m -
                          server_scale * compensated_server_m.sign().add_(
                              1).bool().float().add_(-0.5).mul_(2.0))

        # cupy_server_scale = self.compression_backend.torch2cupy(server_scale)

        cupy_server_sign_packed = self.compression_backend.compress_by_chunk(
            self.compression_backend.torch2cupy(
                compensated_server_m.sign_().add_(1).bool()), 1)
        compensated_server_m = None

        cupy_recvbuf_sign_server = cupy.zeros(
            [self.size, cupy_server_sign_packed[0].size],
            dtype=cupy_recvbuf_sign.dtype)
        # cupy_recvbuf_sign, recvbuf_sign = None, None
        cupy_recvbuf_sign = None

        server_sign_packed = [
            self.compression_backend.cupy2torch(cupy_server_sign_packed[0])
        ]
        recvbuf_sign_server = [
            self.compression_backend.cupy2torch(cupy_recvbuf_sign_server[idx])
            for idx in range(self.size)
        ]

        # server_scale = self.compression_backend.cupy2torch(cupy_server_scale)
        cupy_recvbuf_scale_server = cupy.zeros([self.size, 1],
                                               dtype=cupy_worker_scale.dtype)
        # cupy_recvbuf_scale, recvbuf_scale = None, None

        recvbuf_scale_server = [
            self.compression_backend.cupy2torch(cupy_recvbuf_scale_server[idx])
            for idx in range(self.size)
        ]

        # Communication Phase 2
        dist.all_gather(recvbuf_sign_server, server_sign_packed[0])
        dist.all_gather(recvbuf_scale_server, server_scale)

        cupy_server_sign_packed = None

        # need to convert from a tensor list to a single tensor
        # dist.all_gather only provides a tensor list as the recv/output buffer
        recvbuf_sign_server = torch.stack(recvbuf_sign_server)

        cupy_recvbuf_sign_server = self.compression_backend.torch2cupy(
            recvbuf_sign_server)

        buffer_m.data.copy_(
            self.compression_backend.cupy2torch(
                (cupy.unpackbits(cupy_recvbuf_sign_server.flatten())).reshape(
                    self.size, -1)).float().add_(-0.5).mul_(2.0).mul_(
                        self.compression_backend.cupy2torch(
                            cupy_recvbuf_scale_server)).flatten().data)
        if original_size != worker_error_size:
            buffer_m = buffer_m[0:original_size]
        if len(original_shape) > 1:
            buffer_m = buffer_m.reshape(original_shape)

        return buffer_m
Example #40
0
LoadFrom = "weights.npy"  # The pretrained model
saving = False  # Set it as True if you want to save the trained model
best_perf = 0
Nnrn = [NhidenNeurons, NumOfClasses]  # Number of neurons at hidden and output layers
cats = [4, 1, 0, 7, 9, 2, 3, 5, 8, 6]  # Reordering the categories

# General variables
images = []  # To keep training images
labels = []  # To keep training labels
images_test = []  # To keep test images
labels_test = []  # To keep test labels
W = []  # To hold the weights of hidden and output layers
firingTime = []  # To hold the firing times of hidden and output layers
Spikes = []  # To hold the spike trains of hidden and output layers
X = []  # To be used in converting firing times to spike trains
target = cp.zeros([NumOfClasses])  # To keep the target firing times of current image
FiringFrequency = []  # to count number of spikes each neuron emits during an epoch

# loading MNIST dataset
mndata = MNIST('MNIST/')
# mndata.gz = False

Images, Labels = mndata.load_training()
Images = np.array(Images)
for i in range(len(Labels)):
    if Labels[i] in cats:
        images.append(np.floor((GrayLevels - Images[i].reshape(28, 28)) * tmax / GrayLevels).astype(int))
        labels.append(cats.index(Labels[i]))

Images, Labels = mndata.load_testing()
Images = np.array(Images)
Example #41
0
def gels(a, b):
    """Solves over/well/under-determined linear systems.

    Computes least-square solution to equation ``ax = b` by QR factorization
    using cusolverDn<t>geqrf().

    Args:
        a (cupy.ndarray): The matrix with dimension ``(M, N)``.
        b (cupy.ndarray): The matrix with dimension ``(M)`` or ``(M, K)``.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(N)`` or ``(N, K)``.
    """
    if a.ndim != 2:
        raise ValueError('a.ndim must be 2 (actual: {})'.format(a.ndim))
    if b.ndim == 1:
        nrhs = 1
    elif b.ndim == 2:
        nrhs = b.shape[1]
    else:
        raise ValueError('b.ndim must be 1 or 2 (actual: {})'.format(b.ndim))
    if a.shape[0] != b.shape[0]:
        raise ValueError('shape mismatch (a: {}, b: {}).'.
                         format(a.shape, b.shape))
    if a.dtype != b.dtype:
        raise ValueError('dtype mismatch (a: {}, b: {}).'.
                         format(a.dtype, b.dtype))

    dtype = a.dtype
    if dtype == 'f':
        t = 's'
    elif dtype == 'd':
        t = 'd'
    elif dtype == 'F':
        t = 'c'
    elif dtype == 'D':
        t = 'z'
    else:
        raise ValueError('unsupported dtype (actual: {})'.format(dtype))

    geqrf_helper = getattr(_cusolver, t + 'geqrf_bufferSize')
    geqrf = getattr(_cusolver, t + 'geqrf')
    trsm = getattr(_cublas, t + 'trsm')
    if t in 'sd':
        ormqr_helper = getattr(_cusolver, t + 'ormqr_bufferSize')
        ormqr = getattr(_cusolver, t + 'ormqr')
    else:
        ormqr_helper = getattr(_cusolver, t + 'unmqr_bufferSize')
        ormqr = getattr(_cusolver, t + 'unmqr')

    no_trans = _cublas.CUBLAS_OP_N
    if dtype.char in 'fd':
        trans = _cublas.CUBLAS_OP_T
    else:
        trans = _cublas.CUBLAS_OP_C

    m, n = a.shape
    mn_min = min(m, n)
    dev_info = _cupy.empty(1, dtype=_numpy.int32)
    tau = _cupy.empty(mn_min, dtype=dtype)
    cusolver_handle = _device.get_cusolver_handle()
    cublas_handle = _device.get_cublas_handle()

    if m >= n:  # over/well-determined systems
        a = a.copy(order='F')
        b = b.copy(order='F')

        # geqrf (QR decomposition, A = Q * R)
        ws_size = geqrf_helper(cusolver_handle, m, n, a.data.ptr, m)
        workspace = _cupy.empty(ws_size, dtype=dtype)
        geqrf(cusolver_handle, m, n, a.data.ptr, m, tau.data.ptr,
              workspace.data.ptr, ws_size, dev_info.data.ptr)
        _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed(
            geqrf, dev_info)

        # ormqr (Computes Q^T * B)
        ws_size = ormqr_helper(
            cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, trans, m, nrhs, mn_min,
            a.data.ptr, m, tau.data.ptr, b.data.ptr, m)
        workspace = _cupy.empty(ws_size, dtype=dtype)
        ormqr(cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, trans, m, nrhs,
              mn_min, a.data.ptr, m, tau.data.ptr, b.data.ptr, m,
              workspace.data.ptr, ws_size, dev_info.data.ptr)
        _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed(
            ormqr, dev_info)

        # trsm (Solves R * X = (Q^T * B))
        trsm(cublas_handle, _cublas.CUBLAS_SIDE_LEFT,
             _cublas.CUBLAS_FILL_MODE_UPPER, no_trans,
             _cublas.CUBLAS_DIAG_NON_UNIT, mn_min, nrhs, 1, a.data.ptr, m,
             b.data.ptr, m)

        return b[:n]

    else:  # under-determined systems
        a = a.conj().T.copy(order='F')
        bb = b
        out_shape = (n,) if b.ndim == 1 else (n, nrhs)
        b = _cupy.zeros(out_shape, dtype=dtype, order='F')
        b[:m] = bb

        # geqrf (QR decomposition, A^T = Q * R)
        ws_size = geqrf_helper(cusolver_handle, n, m, a.data.ptr, n)
        workspace = _cupy.empty(ws_size, dtype=dtype)
        geqrf(cusolver_handle, n, m, a.data.ptr, n, tau.data.ptr,
              workspace.data.ptr, ws_size, dev_info.data.ptr)
        _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed(
            geqrf, dev_info)

        # trsm (Solves R^T * Z = B)
        trsm(cublas_handle, _cublas.CUBLAS_SIDE_LEFT,
             _cublas.CUBLAS_FILL_MODE_UPPER, trans,
             _cublas.CUBLAS_DIAG_NON_UNIT, m, nrhs, 1, a.data.ptr, n,
             b.data.ptr, n)

        # ormqr (Computes Q * Z)
        ws_size = ormqr_helper(
            cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, no_trans, n, nrhs,
            mn_min, a.data.ptr, n, tau.data.ptr, b.data.ptr, n)
        workspace = _cupy.empty(ws_size, dtype=dtype)
        ormqr(cusolver_handle, _cublas.CUBLAS_SIDE_LEFT, no_trans, n, nrhs,
              mn_min, a.data.ptr, n, tau.data.ptr, b.data.ptr, n,
              workspace.data.ptr, ws_size, dev_info.data.ptr)
        _cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed(
            ormqr, dev_info)

        return b
Example #42
0
def test_moments_normalized_invalid():
    with pytest.raises(ValueError):
        moments_normalized(cp.zeros((3, 3)), 3)
    with pytest.raises(ValueError):
        moments_normalized(cp.zeros((3, 3)), 4)
Example #43
0
def convolutional_barycenter_gpu(Hv,
                                 reg,
                                 alpha,
                                 stabThresh=1e-30,
                                 niter=1500,
                                 tol=1e-9,
                                 sharpening=False,
                                 verbose=False):
    """Main function solving wasserstein barycenter problem using gpu

    Arguments:
        Hv {Set of distributions (cparray)} -- 
        reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40
        alpha {list} -- set of weights

    Keyword Arguments:
        stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30})
        niter {int} -- Maximum number of loop iteration (default: {1500})
        tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9})
        sharpening {bool} -- Whether or not entropic sharpening is used (default: {False})
        verbose {bool} --  verbose option

    Returns:
        cparray -- solution of weighted wassertein barycenter problem
    """
    def K(x):
        return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg))

    def to_find_root(barycenter, H0, beta):
        return entropy(barycenter**beta) - H0

    alpha = cp.array(alpha)
    alpha = alpha / alpha.sum()
    Hv = cp.array(Hv)
    mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2.
    #print('mean weights', mean_weights)
    for i in range(len(Hv)):
        Hv[i] = Hv[i] / Hv[i].sum()
    v = cp.ones(Hv.shape)
    Kw = cp.ones(Hv.shape)

    entropy_max = max_entropy(Hv)
    barycenter = cp.zeros(Hv[0].shape)

    change = 1
    for j in range(niter):
        t0 = time.time()
        barycenterOld = barycenter

        barycenter = cp.zeros_like(Hv[0, :, :])
        for i in range(Hv.shape[0]):

            Kw[i, :, :] = K(Hv[i, :, :] /
                            cp.maximum(stabThresh, K(v[i, :, :])))
            barycenter += alpha[i] * cp.log(
                cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :]))

        barycenter = cp.exp(barycenter)
        change = cp.sum(cp.abs(barycenter - barycenterOld))
        if sharpening:
            if (entropy(barycenter)) > (entropy_max):

                beta = newton(
                    lambda beta: to_find_root(barycenter, entropy_max, beta),
                    1,
                    tol=1e-6)
                if beta < 0:
                    beta = 1

            else:
                beta = 1
            barycenter = barycenter**beta

        for i in range(Hv.shape[0]):
            v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :])

        if verbose:
            #sys.stdout('output.log','a')
            print("iter : ", j, "change : ", change, 'time :',
                  time.time() - t0)
        if change < tol:
            break

    return cp.asnumpy(barycenter)
Example #44
0
    line = f.readline()
f.close()

#ベクトルの元データを取得(base)
add_vector = []
f = codecs.open('vectors.txt', 'r', 'utf8')
line = f.readline()
while line:
    v = cp.array(line.split(','), dtype=cp.float32)
    add_vector.append(v)
    line = f.readline()
f.close()

delta = 0.01  #変化係数
#ターゲットのベクトルを作成
tgt_vector = cp.zeros((100, 1, 100), dtype=cp.float32)
for i in range(100):
    tgt_vector[i] = (add_vector[0] * (1 - (i * delta))) + (org_vector[0] *
                                                           (i * delta))

print(tgt_vector.shape)

#ターゲットのベクトルから画像を生成
for i in range(100):
    with chainer.using_config('train', False):
        result = model(tgt_vector[i])
    data = np.zeros((128, 128, 3), dtype=np.uint8)
    dst = result.data[0] * 255.0
    data[:, :, 0] = dst[0]
    data[:, :, 1] = dst[1]
    data[:, :, 2] = dst[2]
Example #45
0
def rollout(rollout_arg_tuple):
    try:
        global initial_z_t
        generation, mutation_idx, trial, args, vision, model, gpu, W_c, b_c, max_timesteps, with_frames = rollout_arg_tuple

        random_rollouts_dir = os.path.join(args.data_dir, args.game,
                                           args.experiment_name,
                                           'random_rollouts')

        if args.in_dream:
            log(
                ID,
                "Loading random rollouts for initial frames for dream training"
            )
            initial_z_t = ModelDataset(dir=random_rollouts_dir,
                                       load_batch_size=args.initial_z_size,
                                       verbose=False)

        # The same starting seed gets passed in multiprocessing, need to reset it for each process:
        np.random.seed()

        if not with_frames:
            log(
                ID, ">>> Starting generation #" + str(generation) +
                ", mutation #" + str(mutation_idx + 1) + ", trial #" +
                str(trial + 1))
        else:
            frames_array = []
        start_time = time.time()

        model.reset_state()

        if args.in_dream:
            z_t, _, _, _, _ = initial_z_t[np.random.randint(len(initial_z_t))]
            z_t = z_t[0]
            if gpu is not None:
                z_t = cuda.to_gpu(z_t)
            if with_frames:
                observation = vision.decode(z_t).data
                if gpu is not None:
                    observation = cp.asnumpy(observation)
                observation = post_process_image_tensor(observation)[0]
            else:
                # free up precious GPU memory:
                if gpu is not None:
                    vision.to_cpu()
                vision = None
            if args.initial_z_noise > 0.:
                if gpu is not None:
                    z_t += cp.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(cp.float32)
                else:
                    z_t += np.random.normal(0., args.initial_z_noise,
                                            z_t.shape).astype(np.float32)
        else:
            if args.game in DOOM_GAMES:
                env = ViZDoomWrapper(args.game)
            else:
                env = gym.make(args.game)
            observation = env.reset()
        if with_frames:
            frames_array.append(observation)

        if gpu is not None:
            h_t = cp.zeros(args.hidden_dim).astype(cp.float32)
            c_t = cp.zeros(args.hidden_dim).astype(cp.float32)
        else:
            h_t = np.zeros(args.hidden_dim).astype(np.float32)
            c_t = np.zeros(args.hidden_dim).astype(np.float32)

        done = False
        cumulative_reward = 0
        t = 0
        while not done:
            if not args.in_dream:
                observation = imresize(observation,
                                       (args.frame_resize, args.frame_resize))
                observation = pre_process_image_tensor(
                    np.expand_dims(observation, 0))

                if gpu is not None:
                    observation = cuda.to_gpu(observation)
                z_t = vision.encode(observation, return_z=True).data[0]

            a_t = action(args, W_c, b_c, z_t, h_t, c_t, gpu)

            if args.in_dream:
                z_t, done = model(z_t, a_t, temperature=args.temperature)
                done = done.data[0]
                if with_frames:
                    observation = post_process_image_tensor(
                        vision.decode(z_t).data)[0]
                reward = 1
                if done >= args.done_threshold:
                    done = True
                else:
                    done = False
            else:
                observation, reward, done, _ = env.step(
                    a_t if gpu is None else cp.asnumpy(a_t))
                model(z_t, a_t, temperature=args.temperature)
            if with_frames:
                frames_array.append(observation)

            cumulative_reward += reward

            h_t = model.get_h().data[0]
            c_t = model.get_c().data[0]

            t += 1
            if max_timesteps is not None and t == max_timesteps:
                break
            elif args.in_dream and t == args.dream_max_len:
                log(
                    ID,
                    ">>> generation #{}, mutation #{}, trial #{}: maximum length of {} timesteps reached in dream!"
                    .format(generation, str(mutation_idx + 1), str(trial + 1),
                            t))
                break

        if not args.in_dream:
            env.close()

        if not with_frames:
            log(
                ID,
                ">>> Finished generation #{}, mutation #{}, trial #{} in {} timesteps in {:.2f}s with cumulative reward {:.2f}"
                .format(generation, str(mutation_idx + 1), str(trial + 1), t,
                        (time.time() - start_time), cumulative_reward))
            return cumulative_reward
        else:
            frames_array = np.asarray(frames_array)
            if args.game in DOOM_GAMES and not args.in_dream:
                frames_array = post_process_image_tensor(frames_array)
            return cumulative_reward, np.asarray(frames_array)
    except Exception:
        print(traceback.format_exc())
        return 0.
def calc_pgh(ispec, wavelengths, psfparams):
    '''
    Calculate the pixelated Gauss Hermite for all wavelengths of a single spectrum

    ispec : integer spectrum number
    wavelengths : array of wavelengths to evaluate
    psfparams : dictionary of PSF parameters returned by evalcoeffs

    returns pGHx, pGHy

    where pGHx[ghdeg+1, nwave, nbinsx] contains the pixel-integrated Gauss-Hermite polynomial
    for all degrees at all wavelengths across nbinsx bins spaning the PSF spot, and similarly
    for pGHy.  The core PSF will then be evaluated as

    PSFcore = sum_ij c_ij outer(pGHy[j], pGHx[i])
    '''

    #- shorthand
    p = psfparams

    #- spot size (ny,nx)
    nx = p['HSIZEX']
    ny = p['HSIZEY']
    nwave = len(wavelengths)
    p['X'], p['Y'], p['GHSIGX'], p['GHSIGY'] = \
    cp.array(p['X']), cp.array(p['Y']), cp.array(p['GHSIGX']), cp.array(p['GHSIGY'])
    xedges = cp.repeat(cp.arange(nx + 1) - nx // 2,
                       nwave).reshape(nx + 1, nwave)
    yedges = cp.repeat(cp.arange(ny + 1) - ny // 2,
                       nwave).reshape(ny + 1, nwave)

    #- Shift to be relative to the PSF center at 0 and normalize
    #- by the PSF sigma (GHSIGX, GHSIGY)
    #- xedges[nx+1, nwave]
    #- yedges[ny+1, nwave]
    xedges = (xedges - p['X'][ispec] % 1) / p['GHSIGX'][ispec]
    yedges = (yedges - p['Y'][ispec] % 1) / p['GHSIGY'][ispec]

    #- Degree of the Gauss-Hermite polynomials
    ghdegx = p['GHDEGX']
    ghdegy = p['GHDEGY']

    #- Evaluate the Hermite polynomials at the pixel edges
    #- HVx[ghdegx+1, nwave, nx+1]
    #- HVy[ghdegy+1, nwave, ny+1]
    HVx = hermevander_wrapper(xedges, ghdegx).T
    HVy = hermevander_wrapper(yedges, ghdegy).T

    #- Evaluate the Gaussians at the pixel edges
    #- Gx[nwave, nx+1]
    #- Gy[nwave, ny+1]
    Gx = cp.exp(-0.5 * xedges**2).T / cp.sqrt(2. * cp.pi)
    Gy = cp.exp(-0.5 * yedges**2).T / cp.sqrt(2. * cp.pi)

    #- Combine into Gauss*Hermite
    GHx = HVx * Gx
    GHy = HVy * Gy

    #- Integrate over the pixels using the relationship
    #  Integral{ H_k(x) exp(-0.5 x^2) dx} = -H_{k-1}(x) exp(-0.5 x^2) + const

    #- pGHx[ghdegx+1, nwave, nx]
    #- pGHy[ghdegy+1, nwave, ny]
    pGHx = cp.zeros((ghdegx + 1, nwave, nx))
    pGHy = cp.zeros((ghdegy + 1, nwave, ny))
    pGHx[0] = 0.5 * cp.diff(cupyx.scipy.special.erf(xedges / cp.sqrt(2.)).T)
    pGHy[0] = 0.5 * cp.diff(cupyx.scipy.special.erf(yedges / cp.sqrt(2.)).T)
    pGHx[1:] = GHx[:ghdegx, :, 0:nx] - GHx[:ghdegx, :, 1:nx + 1]
    pGHy[1:] = GHy[:ghdegy, :, 0:ny] - GHy[:ghdegy, :, 1:ny + 1]

    return pGHx, pGHy
Example #47
0
def eigsh(a,
          k=6,
          *,
          which='LM',
          ncv=None,
          maxiter=None,
          tol=0,
          return_eigenvectors=True):
    """Finds ``k`` eigenvalues and eigenvectors of the real symmetric matrix.

    Solves ``Ax = wx``, the standard eigenvalue problem for ``w`` eigenvalues
    with corresponding eigenvectors ``x``.

    Args:
        a (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): A symmetric square
            matrix with dimension ``(n, n)``.
        k (int): The number of eigenvalues and eigenvectors to compute. Must be
            ``1 <= k < n``.
        which (str): 'LM' or 'LA'. 'LM': finds ``k`` largest (in magnitude)
            eigenvalues. 'LA': finds ``k`` largest (algebraic) eigenvalues.
        ncv (int): The number of Lanczos vectors generated. Must be
            ``k + 1 < ncv < n``. If ``None``, default value is used.
        maxiter (int): Maximum number of Lanczos update iterations.
            If ``None``, default value is used.
        tol (float): Tolerance for residuals ``||Ax - wx||``. If ``0``, machine
            precision is used.
        return_eigenvectors (bool): If ``True``, returns eigenvectors in
            addition to eigenvalues.

    Returns:
        tuple:
            If ``return_eigenvectors is True``, it returns ``w`` and ``x``
            where ``w`` is eigenvalues and ``x`` is eigenvectors. Otherwise,
            it returns only ``w``.

    .. seealso:: :func:`scipy.sparse.linalg.eigsh`

    .. note::
        This function uses the thick-restart Lanczos methods
        (https://sdm.lbl.gov/~kewu/ps/trlan.html).

    """
    n = a.shape[0]
    if a.ndim != 2 or a.shape[0] != a.shape[1]:
        raise ValueError('expected square matrix (shape: {})'.format(a.shape))
    if a.dtype.char not in 'fdFD':
        raise TypeError('unsupprted dtype (actual: {})'.format(a.dtype))
    if k <= 0:
        raise ValueError('k must be greater than 0 (actual: {})'.format(k))
    if k >= n:
        raise ValueError('k must be smaller than n (actual: {})'.format(k))
    if which not in ('LM', 'LA'):
        raise ValueError('which must be \'LM\' or \'LA\' (actual: {})'
                         ''.format(which))
    if ncv is None:
        ncv = min(max(2 * k, k + 32), n - 1)
    else:
        ncv = min(max(ncv, k + 2), n - 1)
    if maxiter is None:
        maxiter = 10 * n
    if tol == 0:
        tol = numpy.finfo(a.dtype).eps

    alpha = cupy.zeros((ncv, ), dtype=a.dtype)
    beta = cupy.zeros((ncv, ), dtype=a.dtype.char.lower())
    V = cupy.empty((ncv, n), dtype=a.dtype)

    # Set initial vector
    u = cupy.random.random((n, )).astype(a.dtype)
    V[0] = u / cublas.nrm2(u)

    # Choose Lanczos implementation, unconditionally use 'fast' for now
    upadte_impl = 'fast'
    if upadte_impl == 'fast':
        lanczos = _lanczos_fast(a, n, ncv)
    else:
        lanczos = _lanczos_asis

    # Lanczos iteration
    lanczos(a, V, u, alpha, beta, 0, ncv)

    iter = ncv
    w, s = _eigsh_solve_ritz(alpha, beta, None, k, which)
    x = V.T @ s

    # Compute residual
    beta_k = beta[-1] * s[-1, :]
    res = cublas.nrm2(beta_k)

    while res > tol and iter < maxiter:
        # Setup for thick-restart
        beta[:k] = 0
        alpha[:k] = w
        V[:k] = x.T

        u -= u.T @ V[:k].conj().T @ V[:k]
        V[k] = u / cublas.nrm2(u)

        u[...] = a @ V[k]
        cublas.dotc(V[k], u, out=alpha[k])
        u -= alpha[k] * V[k]
        u -= V[:k].T @ beta_k
        cublas.nrm2(u, out=beta[k])
        V[k + 1] = u / beta[k]

        # Lanczos iteration
        lanczos(a, V, u, alpha, beta, k + 1, ncv)

        iter += ncv - k
        w, s = _eigsh_solve_ritz(alpha, beta, beta_k, k, which)
        x = V.T @ s

        # Compute residual
        beta_k = beta[-1] * s[-1, :]
        res = cublas.nrm2(beta_k)

    if return_eigenvectors:
        idx = cupy.argsort(w)
        return w[idx], x[:, idx]
    else:
        return cupy.sort(w)
Example #48
0
        return self._buf != other._buf

    def __len__(self):
        return len(self._buf)

    def __getitem__(self, item):
        return self._buf[item]

    def __setitem__(self, item, value):
        self._buf[item] = value._buf


cupy_issue_2259 = False
if cupy is not None:
    cupy_issue_2259 = not isinstance(
        cupy.zeros((2, 2)).T.__cuda_array_interface__['strides'], tuple)

# ---


def Sendrecv(smsg, rmsg):
    MPI.COMM_SELF.Sendrecv(sendbuf=smsg,
                           dest=0,
                           sendtag=0,
                           recvbuf=rmsg,
                           source=0,
                           recvtag=0,
                           status=MPI.Status())


class TestMessageSimple(unittest.TestCase):
Example #49
0
 def testOrderFortran(self):
     sbuf = cupy.ones([3, 2]).T
     rbuf = cupy.zeros([3, 2]).T
     Sendrecv(sbuf, rbuf)
     self.assertTrue((sbuf == rbuf).all())
Example #50
0
 def test_ndarray_indices_false(self):
     nd_image = cp.zeros((5, 5, 5))
     nd_image[2, 2, 2] = 1
     peaks = peak.peak_local_max(nd_image, min_distance=1, indices=False)
     assert (peaks == nd_image.astype(cp.bool)).all()
Example #51
0
 def testOrderC(self):
     sbuf = cupy.ones([3, 2])
     rbuf = cupy.zeros([3, 2])
     Sendrecv(sbuf, rbuf)
     self.assertTrue((sbuf == rbuf).all())
Example #52
0
def _select(
    input,
    labels=None,
    index=None,
    find_min=False,
    find_max=False,
    find_min_positions=False,
    find_max_positions=False,
    find_median=False,
):
    """Return one or more of: min, max, min position, max position, median.

    If neither `labels` or `index` is provided, these are the global values
    in `input`. If `index` is None, but `labels` is provided, a global value
    across all non-zero labels is given. When both `labels` and `index` are
    provided, lists of values are provided for each labeled region specified
    in `index`. See further details in :func:`cupyx.scipy.ndimage.minimum`,
    etc.

    Used by minimum, maximum, minimum_position, maximum_position, extrema.
    """
    find_positions = find_min_positions or find_max_positions
    positions = None
    if find_positions:
        positions = cupy.arange(input.size).reshape(input.shape)

    def single_group(vals, positions):
        result = []
        if find_min:
            result += [vals.min()]
        if find_min_positions:
            result += [positions[vals == vals.min()][0]]
        if find_max:
            result += [vals.max()]
        if find_max_positions:
            result += [positions[vals == vals.max()][0]]
        if find_median:
            result += [cupy.median(vals)]
        return result

    if labels is None:
        return single_group(input, positions)

    # ensure input and labels match sizes
    input, labels = cupy.broadcast_arrays(input, labels)

    if index is None:
        mask = labels > 0
        masked_positions = None
        if find_positions:
            masked_positions = positions[mask]
        return single_group(input[mask], masked_positions)

    if cupy.isscalar(index):
        mask = labels == index
        masked_positions = None
        if find_positions:
            masked_positions = positions[mask]
        return single_group(input[mask], masked_positions)

    index = cupy.asarray(index)

    safe_int = _safely_castable_to_int(labels.dtype)
    min_label = labels.min()
    max_label = labels.max()

    # Remap labels to unique integers if necessary, or if the largest label is
    # larger than the number of values.
    if not safe_int or min_label < 0 or max_label > labels.size:
        # Remap labels, and indexes
        unique_labels, labels = cupy.unique(labels, return_inverse=True)
        idxs = cupy.searchsorted(unique_labels, index)

        # Make all of idxs valid
        idxs[idxs >= unique_labels.size] = 0
        found = unique_labels[idxs] == index
    else:
        # Labels are an integer type, and there aren't too many
        idxs = cupy.asanyarray(index, cupy.int).copy()
        found = (idxs >= 0) & (idxs <= max_label)

    idxs[~found] = max_label + 1

    input = input.ravel()
    labels = labels.ravel()
    if find_positions:
        positions = positions.ravel()

    if hasattr(cupy, "_core"):
        using_cub = (cupy._core._accelerator.ACCELERATOR_CUB
                     in cupy._core.get_routine_accelerators())
    else:
        using_cub = (cupy.core._accelerator.ACCELERATOR_CUB
                     in cupy.core.get_routine_accelerators())

    if using_cub:
        # Cutoff values below were determined empirically for relatively large
        # input arrays.
        if find_positions or find_median:
            n_label_cutoff = 15
        else:
            n_label_cutoff = 30
    else:
        n_label_cutoff = 0

    if n_label_cutoff and len(idxs) <= n_label_cutoff:
        return _select_via_looping(
            input,
            labels,
            idxs,
            positions,
            find_min,
            find_min_positions,
            find_max,
            find_max_positions,
            find_median,
        )

    order = cupy.lexsort(cupy.stack((input.ravel(), labels.ravel())))
    input = input[order]
    labels = labels[order]
    if find_positions:
        positions = positions[order]

    # Determine indices corresponding to the min or max value for each label
    label_change_index = cupy.searchsorted(labels,
                                           cupy.arange(1, max_label + 2))
    if find_min or find_min_positions or find_median:
        # index corresponding to the minimum value at each label
        min_index = label_change_index[:-1]
    if find_max or find_max_positions or find_median:
        # index corresponding to the maximum value at each label
        max_index = label_change_index[1:] - 1

    result = []
    # the order below matches the order expected by cupy.ndimage.extrema
    if find_min:
        mins = cupy.zeros(int(labels.max()) + 2, input.dtype)
        mins[labels[min_index]] = input[min_index]
        result += [mins[idxs]]
    if find_min_positions:
        minpos = cupy.zeros(labels.max().item() + 2, int)
        minpos[labels[min_index]] = positions[min_index]
        result += [minpos[idxs]]
    if find_max:
        maxs = cupy.zeros(int(labels.max()) + 2, input.dtype)
        maxs[labels[max_index]] = input[max_index]
        result += [maxs[idxs]]
    if find_max_positions:
        maxpos = cupy.zeros(labels.max().item() + 2, int)
        maxpos[labels[max_index]] = positions[max_index]
        result += [maxpos[idxs]]
    if find_median:
        locs = cupy.arange(len(labels))
        lo = cupy.zeros(int(labels.max()) + 2, cupy.int)
        lo[labels[min_index]] = locs[min_index]
        hi = cupy.zeros(int(labels.max()) + 2, cupy.int)
        hi[labels[max_index]] = locs[max_index]
        lo = lo[idxs]
        hi = hi[idxs]
        # lo is an index to the lowest value in input for each label,
        # hi is an index to the largest value.
        # move them to be either the same ((hi - lo) % 2 == 0) or next
        # to each other ((hi - lo) % 2 == 1), then average.
        step = (hi - lo) // 2
        lo += step
        hi -= step
        if input.dtype.kind in "iub":
            # fix for https://github.com/scipy/scipy/issues/12836
            result += [
                (input[lo].astype(float) + input[hi].astype(float)) / 2.0
            ]
        else:
            result += [(input[lo] + input[hi]) / 2.0]

    return result
Example #53
0
 def test_flat_peak(self):
     image = cp.zeros((5, 5), dtype=cp.uint8)
     image[1:3, 1:3] = 10
     peaks = peak.peak_local_max(image, min_distance=1)
     assert len(peaks) == 4
Example #54
0
    def fit(self, item_users, show_progress=True):
        """Factorizes the item_users matrix

        Parameters
        ----------
        item_users: coo_matrix
            Matrix of confidences for the liked items. This matrix should be a coo_matrix where
            the rows of the matrix are the item, and the columns are the users that liked that item.
            BPR ignores the weight value of the matrix right now - it treats non zero entries
            as a binary signal that the user liked the item.
        show_progress : bool, optional
            Whether to show a progress bar
        """
        rs = check_random_state(self.random_state)

        # for now, all we handle is float 32 values
        if item_users.dtype != np.float32:
            item_users = item_users.astype(np.float32)

        items, users = item_users.shape

        # We need efficient user lookup for case of removing own likes
        # TODO: might make more sense to just changes inputs to be users by items instead
        # but that would be a major breaking API change
        user_items = item_users.T.tocsr()
        if not user_items.has_sorted_indices:
            user_items.sort_indices()

        # this basically calculates the 'row' attribute of a COO matrix
        # without requiring us to get the whole COO matrix
        user_counts = np.ediff1d(user_items.indptr)
        userids = np.repeat(np.arange(users),
                            user_counts).astype(user_items.indices.dtype)

        # create factors if not already created.
        # Note: the final dimension is for the item bias term - which is set to a 1 for all users
        # this simplifies interfacing with approximate nearest neighbours libraries etc
        if self.item_factors is None:
            self.item_factors = rs.rand(
                items, self.factors + 1, dtype=cp.float32) - 0.5
            self.item_factors /= self.factors

            # set factors to all zeros for items without any ratings
            item_counts = np.bincount(user_items.indices, minlength=items)
            self.item_factors[item_counts == 0] = cp.zeros(self.factors + 1)

        if self.user_factors is None:
            self.user_factors = rs.rand(
                users, self.factors + 1, dtype=cp.float32) - 0.5
            self.user_factors /= self.factors

            # set factors to all zeros for users without any ratings
            self.user_factors[user_counts == 0] = cp.zeros(self.factors + 1)

            self.user_factors[:, self.factors] = 1.0

        self._item_norms = self._user_norms = None

        userids = implicit.gpu.IntVector(userids)
        itemids = implicit.gpu.IntVector(user_items.indices)
        indptr = implicit.gpu.IntVector(user_items.indptr)

        X = implicit.gpu.Matrix(self.user_factors)
        Y = implicit.gpu.Matrix(self.item_factors)

        log.debug("Running %i BPR training epochs", self.iterations)
        with tqdm(total=self.iterations,
                  disable=not show_progress) as progress:
            for epoch in range(self.iterations):
                correct, skipped = implicit.gpu.bpr_update(
                    userids,
                    itemids,
                    indptr,
                    X,
                    Y,
                    self.learning_rate,
                    self.regularization,
                    rs.randint(2**31),
                    self.verify_negative_samples,
                )
                progress.update(1)
                total = len(user_items.data)
                if total != 0 and total != skipped:
                    progress.set_postfix({
                        "correct":
                        "%.2f%%" % (100.0 * correct / (total - skipped)),
                        "skipped":
                        "%.2f%%" % (100.0 * skipped / total),
                    })
Example #55
0
def convolutional_barycenter_gpu(Hv,
                                 reg: float,
                                 alpha: np.ndarray,
                                 stabThresh=1e-30,
                                 niter=1500,
                                 tol=1e-9,
                                 sharpening=False,
                                 verbose=False):
    """Main function solving wasserstein barycenter problem using gpu
    Parameters:
        Hv    {Set            of distributions (cparray)} --
        reg   {regularization term "gamma"              } -- float superior to 0, generally equals size of space/40
        alpha {list                                     } -- set of weights
    Keyword Parameters:
        stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30})
        niter      {int  } -- Maximum number of loop      iteration (default: {1500})
        tolerance        {float} -- convergence             tolerance at which point iterations stop (default: {1e-9})
        sharpening {bool } -- Whether or not entropic sharpening is used (default: {False})
        verbose    {bool } -- verbose option
    Returns:
        cparray -- solution of weighted wassertein barycenter problem
    """
    import cupy as cp
    from cupyx.scipy.ndimage import gaussian_filter as cupyx_gaussian_filter

    def K_cupyx(x):
        return cupyx_gaussian_filter(x, sigma=reg)

    def to_find_root(barycenter, H0, beta):
        return entropy(barycenter**beta) - H0

    alpha = cp.array(alpha)
    alpha = alpha / alpha.sum()
    Hv = cp.array(Hv)

    for i in range(len(Hv)):
        Hv[i] = Hv[i] / Hv[i].sum()

    v = cp.ones(Hv.shape)
    Kw = cp.ones(Hv.shape)
    entropy_max = max_entropy(Hv)
    barycenter = cp.zeros(Hv[0].shape)

    cumtime_agg = 0

    rolling_delta = []
    cumtime = []
    iterations = []

    change = 1
    for j in range(niter):
        print("For every iteration.. ")
        t0 = time.time()
        barycenterOld = barycenter
        barycenter = cp.zeros_like(Hv[0, :, :])

        print("Hv shape is", Hv.shape)
        for i in range(Hv.shape[0]):
            #* for each of two distributions(which are identical)
            #* distribution  *i* becomes Kernel of (dist1 over the Kernel of v(i))

            Kw[i, :, :] = K_cupyx(Hv[i, :, :] /
                                  cp.maximum(stabThresh, K_cupyx(v[i, :, :])))
            #* barycenter is barycenter plus weighted log of v(i)*Kw(i)
            barycenter += alpha[i] * cp.log(
                cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :]))

        barycenter = cp.exp(barycenter)
        change = cp.sum(cp.abs(barycenter - barycenterOld))

        if sharpening:
            if (entropy(barycenter)) > (entropy_max):
                beta = newton(
                    lambda beta: to_find_root(barycenter, entropy_max, beta),
                    1,
                    tol=1e-6)
                if beta < 0:
                    beta = 1
            else:
                beta = 1
            barycenter = barycenter**beta
        for i in range(Hv.shape[0]):
            # assign to v(i) barycenter normalized by Kw(i)'s largest
            v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :])

        elapsed = np.around(time.time() - t0, 4)
        delta = np.around(change, 10)

        cumtime_agg += elapsed

        iterations.append(j)
        cumtime.append(cumtime_agg)
        rolling_delta.append(float(delta))

        print(f"Refinement iter {j} |  delta: {delta} | elapsed : {elapsed}")
        if change < tol:
            print(f"Exited. Change {change} under tolerance.")
            log = {
                "iterations": iterations,
                "cumtime": cumtime,
                "rolling_delta": rolling_delta,
                "exited_on": j,
                "exited_under_tolerance": True
            }
            # print(f"Exited with 0 on iter {j}")
            return [cp.asnumpy(barycenter), log]
            break

    log = {
        "iterations": iterations,
        "cumtime": cumtime,
        "rolling_delta": rolling_delta,
        "exited_on": j,
        "exited_under_tolerance": False
    }
    # print(f"Exited with 0 on iter {j}")
    return [cp.asnumpy(barycenter), log]
Example #56
0
def make_classification(n_samples=100,
                        n_features=20,
                        n_informative=2,
                        n_redundant=2,
                        n_repeated=0,
                        n_classes=2,
                        n_clusters_per_class=2,
                        weights=None,
                        flip_y=0.01,
                        class_sep=1.0,
                        hypercube=True,
                        shift=0.0,
                        scale=1.0,
                        shuffle=True,
                        random_state=None,
                        order='F',
                        dtype='float32',
                        _centroids=None,
                        _informative_covariance=None,
                        _redundant_covariance=None,
                        _repeated_indices=None):
    """
    Generate a random n-class classification problem.
    This initially creates clusters of points normally distributed (std=1)
    about vertices of an `n_informative`-dimensional hypercube with sides of
    length :py:`2*class_sep` and assigns an equal number of clusters to each
    class. It introduces interdependence between these features and adds
    various types of further noise to the data.
    Without shuffling, `X` horizontally stacks features in the following
    order: the primary `n_informative` features, followed by `n_redundant`
    linear combinations of the informative features, followed by `n_repeated`
    duplicates, drawn randomly with replacement from the informative and
    redundant features. The remaining features are filled with random noise.
    Thus, without shuffling, all useful features are contained in the columns
    :py:`X[:, :n_informative + n_redundant + n_repeated]`.

    Examples
    --------

    .. code-block:: python

        >>> from cuml.datasets.classification import make_classification

        >>> X, y = make_classification(n_samples=10, n_features=4,
        ...                            n_informative=2, n_classes=2,
        ...                            random_state=10)

        >>> print(X) # doctest: +SKIP
        [[-1.7974224   0.24425316  0.39062843 -0.38293394]
        [ 0.6358963   1.4161923   0.06970507 -0.16085647]
        [-0.22802866 -1.1827322   0.3525861   0.276615  ]
        [ 1.7308872   0.43080002  0.05048406  0.29837844]
        [-1.9465544   0.5704457  -0.8997551  -0.27898186]
        [ 1.0575483  -0.9171263   0.09529338  0.01173469]
        [ 0.7917619  -1.0638094  -0.17599393 -0.06420116]
        [-0.6686142  -0.13951421 -0.6074711   0.21645583]
        [-0.88968956 -0.914443    0.1302423   0.02924336]
        [-0.8817671  -0.84549576  0.1845096   0.02556021]]

        >>> print(y)
        [0 1 0 1 1 0 0 1 0 0]

    Parameters
    ----------
    n_samples : int, optional (default=100)
        The number of samples.
    n_features : int, optional (default=20)
        The total number of features. These comprise `n_informative`
        informative features, `n_redundant` redundant features,
        `n_repeated` duplicated features and
        :py:`n_features-n_informative-n_redundant-n_repeated` useless features
        drawn at random.
    n_informative : int, optional (default=2)
        The number of informative features. Each class is composed of a number
        of gaussian clusters each located around the vertices of a hypercube
        in a subspace of dimension `n_informative`. For each cluster,
        informative features are drawn independently from  N(0, 1) and then
        randomly linearly combined within each cluster in order to add
        covariance. The clusters are then placed on the vertices of the
        hypercube.
    n_redundant : int, optional (default=2)
        The number of redundant features. These features are generated as
        random linear combinations of the informative features.
    n_repeated : int, optional (default=0)
        The number of duplicated features, drawn randomly from the informative
        and the redundant features.
    n_classes : int, optional (default=2)
        The number of classes (or labels) of the classification problem.
    n_clusters_per_class : int, optional (default=2)
        The number of clusters per class.
    weights : array-like of shape (n_classes,) or (n_classes - 1,),\
              (default=None)
        The proportions of samples assigned to each class. If None, then
        classes are balanced. Note that if :py:`len(weights) == n_classes - 1`,
        then the last class weight is automatically inferred.
        More than `n_samples` samples may be returned if the sum of
        `weights` exceeds 1.
    flip_y : float, optional (default=0.01)
        The fraction of samples whose class is assigned randomly. Larger
        values introduce noise in the labels and make the classification
        task harder.
    class_sep : float, optional (default=1.0)
        The factor multiplying the hypercube size.  Larger values spread
        out the clusters/classes and make the classification task easier.
    hypercube : boolean, optional (default=True)
        If True, the clusters are put on the vertices of a hypercube. If
        False, the clusters are put on the vertices of a random polytope.
    shift : float, array of shape [n_features] or None, optional (default=0.0)
        Shift features by the specified value. If None, then features
        are shifted by a random value drawn in [-class_sep, class_sep].
    scale : float, array of shape [n_features] or None, optional (default=1.0)
        Multiply features by the specified value. If None, then features
        are scaled by a random value drawn in [1, 100]. Note that scaling
        happens after shifting.
    shuffle : boolean, optional (default=True)
        Shuffle the samples and the features.
    random_state : int, RandomState instance or None (default)
        Determines random number generation for dataset creation. Pass an int
        for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.
    order: str, optional (default='F')
        The order of the generated samples
    dtype : str, optional (default='float32')
        Dtype of the generated samples
    _centroids: array of centroids of shape (n_clusters, n_informative)
    _informative_covariance: array for covariance between informative features
        of shape (n_clusters, n_informative, n_informative)
    _redundant_covariance: array for covariance between redundant features
        of shape (n_informative, n_redundant)
    _repeated_indices: array of indices for the repeated features
        of shape (n_repeated, )

    Returns
    -------
    X : device array of shape [n_samples, n_features]
        The generated samples.
    y : device array of shape [n_samples]
        The integer labels for class membership of each sample.

    Notes
    -----
    The algorithm is adapted from Guyon [1]_ and was designed to generate
    the "Madelon" dataset. How we optimized for GPUs:

        1. Firstly, we generate X from a standard univariate instead of zeros.
           This saves memory as we don't need to generate univariates each
           time for each feature class (informative, repeated, etc.) while
           also providing the added speedup of generating a big matrix
           on GPU
        2. We generate :py:`order=F` construction. We exploit the
           fact that X is a generated from a univariate normal, and
           covariance is introduced with matrix multiplications. Which means,
           we can generate X as a 1D array and just reshape it to the
           desired order, which only updates the metadata and eliminates
           copies
        3. Lastly, we also shuffle by construction. Centroid indices are
           permuted for each sample, and then we construct the data for
           each centroid. This shuffle works for both :py:`order=C` and
           :py:`order=F` and eliminates any need for secondary copies

    References
    ----------
    .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable
           selection benchmark", 2003.

    """
    cuml.internals.set_api_output_type("cupy")

    generator = _create_rs_generator(random_state)
    np_seed = int(generator.randint(n_samples, size=1))
    np.random.seed(np_seed)

    # Count features, clusters and samples
    if n_informative + n_redundant + n_repeated > n_features:
        raise ValueError("Number of informative, redundant and repeated "
                         "features must sum to less than the number of total"
                         " features")
    # Use log2 to avoid overflow errors
    if n_informative < np.log2(n_classes * n_clusters_per_class):
        msg = "n_classes({}) * n_clusters_per_class({}) must be"
        msg += " smaller or equal 2**n_informative({})={}"
        raise ValueError(
            msg.format(n_classes, n_clusters_per_class, n_informative,
                       2**n_informative))

    if weights is not None:
        if len(weights) not in [n_classes, n_classes - 1]:
            raise ValueError("Weights specified but incompatible with number "
                             "of classes.")
        if len(weights) == n_classes - 1:
            if isinstance(weights, list):
                weights = weights + [1.0 - sum(weights)]
            else:
                weights = np.resize(weights, n_classes)
                weights[-1] = 1.0 - sum(weights[:-1])
    else:
        weights = [1.0 / n_classes] * n_classes

    n_clusters = n_classes * n_clusters_per_class

    # Distribute samples among clusters by weight
    n_samples_per_cluster = [
        int(n_samples * weights[k % n_classes] / n_clusters_per_class)
        for k in range(n_clusters)
    ]

    for i in range(n_samples - sum(n_samples_per_cluster)):
        n_samples_per_cluster[i % n_clusters] += 1

    # Initialize X and y
    X = generator.randn(n_samples * n_features, dtype=dtype)
    X = X.reshape((n_samples, n_features), order=order)
    y = cp.zeros(n_samples, dtype=np.int64)

    # Build the polytope whose vertices become cluster centroids
    if _centroids is None:
        centroids = cp.array(
            _generate_hypercube(n_clusters, n_informative,
                                generator)).astype(dtype, copy=False)
    else:
        centroids = _centroids
    centroids *= 2 * class_sep
    centroids -= class_sep
    if not hypercube:
        centroids *= generator.rand(n_clusters, 1, dtype=dtype)
        centroids *= generator.rand(1, n_informative, dtype=dtype)

    # Create redundant features
    if n_redundant > 0:
        if _redundant_covariance is None:
            B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1
        else:
            B = _redundant_covariance

    # Create each cluster; a variant of make_blobs
    if shuffle:
        proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum(
            n_samples_per_cluster)
        shuffled_sample_indices = cp.array(
            np.random.choice(n_clusters,
                             n_samples,
                             replace=True,
                             p=proba_samples_per_cluster))
        for k, centroid in enumerate(centroids):
            centroid_indices = cp.where(shuffled_sample_indices == k)
            y[centroid_indices[0]] = k % n_classes

            X_k = X[centroid_indices[0], :n_informative]

            if _informative_covariance is None:
                A = 2 * generator.rand(
                    n_informative, n_informative, dtype=dtype) - 1
            else:
                A = _informative_covariance[k]
            X_k = cp.dot(X_k, A)

            # NOTE: This could be done outside the loop, but a current
            # cupy bug does not allow that
            # https://github.com/cupy/cupy/issues/3284
            if n_redundant > 0:
                X[centroid_indices[0],
                  n_informative:n_informative + n_redundant] = cp.dot(X_k, B)

            X_k += centroid  # shift the cluster to a vertex
            X[centroid_indices[0], :n_informative] = X_k
    else:
        stop = 0
        for k, centroid in enumerate(centroids):
            start, stop = stop, stop + n_samples_per_cluster[k]
            y[start:stop] = k % n_classes  # assign labels
            X_k = X[start:stop, :n_informative]  # slice a view of the cluster

            if _informative_covariance is None:
                A = 2 * generator.rand(
                    n_informative, n_informative, dtype=dtype) - 1
            else:
                A = _informative_covariance[k]
            X_k = cp.dot(X_k, A)  # introduce random covariance

            if n_redundant > 0:
                X[start:stop, n_informative:n_informative + n_redundant] = \
                    cp.dot(X_k, B)

            X_k += centroid  # shift the cluster to a vertex
            X[start:stop, :n_informative] = X_k

    # Repeat some features
    if n_repeated > 0:
        n = n_informative + n_redundant
        if _repeated_indices is None:
            indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) +
                       0.5).astype(np.intp)
        else:
            indices = _repeated_indices
        X[:, n:n + n_repeated] = X[:, indices]

    # Randomly replace labels
    if flip_y >= 0.0:
        flip_mask = generator.rand(n_samples, dtype=dtype) < flip_y
        y[flip_mask] = generator.randint(n_classes, size=int(flip_mask.sum()))

    # Randomly shift and scale
    if shift is None:
        shift = (2 * generator.rand(n_features, dtype=dtype) - 1) * class_sep
    X += shift

    if scale is None:
        scale = 1 + 100 * generator.rand(n_features, dtype=dtype)
    X *= scale

    return X, y
Example #57
0
    def fit(self, X, y=None) -> "KBinsDiscretizer":
        """
        Fit the estimator.

        Parameters
        ----------
        X : numeric array-like, shape (n_samples, n_features)
            Data to be discretized.

        y : None
            Ignored. This parameter exists only for compatibility with
            :class:`sklearn.pipeline.Pipeline`.

        Returns
        -------
        self
        """
        X = self._validate_data(X, dtype='numeric')

        valid_encode = ('onehot', 'onehot-dense', 'ordinal')
        if self.encode not in valid_encode:
            raise ValueError("Valid options for 'encode' are {}. "
                             "Got encode={!r} instead.".format(
                                 valid_encode, self.encode))
        valid_strategy = ('uniform', 'quantile', 'kmeans')
        if self.strategy not in valid_strategy:
            raise ValueError("Valid options for 'strategy' are {}. "
                             "Got strategy={!r} instead.".format(
                                 valid_strategy, self.strategy))

        n_features = X.shape[1]
        n_bins = self._validate_n_bins(n_features)
        n_bins = np.asnumpy(n_bins)

        bin_edges = cpu_np.zeros(n_features, dtype=object)
        for jj in range(n_features):
            column = X[:, jj]
            col_min, col_max = column.min(), column.max()

            if col_min == col_max:
                warnings.warn("Feature %d is constant and will be "
                              "replaced with 0." % jj)
                n_bins[jj] = 1
                bin_edges[jj] = np.array([-np.inf, np.inf])
                continue

            if self.strategy == 'uniform':
                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)

            elif self.strategy == 'quantile':
                quantiles = np.linspace(0, 100, n_bins[jj] + 1)
                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))
                # Workaround for https://github.com/cupy/cupy/issues/4451
                # This should be removed as soon as a fix is available in cupy
                # in order to limit alterations in the included sklearn code
                bin_edges[jj][-1] = col_max

            elif self.strategy == 'kmeans':
                # Deterministic initialization with uniform spacing
                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)
                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5

                # 1D k-means procedure
                km = KMeans(n_clusters=n_bins[jj],
                            init=init,
                            n_init=1,
                            output_type='cupy')
                km = km.fit(column[:, None])
                with using_output_type('cupy'):
                    centers = km.cluster_centers_[:, 0]
                # Must sort, centers may be unsorted even with sorted init
                centers.sort()
                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5
                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]

            # Remove bins whose width are too small (i.e., <= 1e-8)
            if self.strategy in ('quantile', 'kmeans'):
                mask = np.diff(bin_edges[jj], prepend=-np.inf) > 1e-8
                bin_edges[jj] = bin_edges[jj][mask]
                if len(bin_edges[jj]) - 1 != n_bins[jj]:
                    warnings.warn('Bins whose width are too small (i.e., <= '
                                  '1e-8) in feature %d are removed. Consider '
                                  'decreasing the number of bins.' % jj)
                    n_bins[jj] = len(bin_edges[jj]) - 1

        self.bin_edges_ = bin_edges
        self.n_bins_ = n_bins

        if 'onehot' in self.encode:
            self._encoder = OneHotEncoder(categories=np.array(
                [np.arange(i) for i in self.n_bins_]),
                                          sparse=self.encode == 'onehot',
                                          output_type='cupy')
            # Fit the OneHotEncoder with toy datasets
            # so that it's ready for use after the KBinsDiscretizer is fitted
            self._encoder.fit(np.zeros((1, len(self.n_bins_)), dtype=int))

        return self
Example #58
0
    def test_cuDoubleComplex(self):
        N = 100
        block = 32
        grid = (N + block - 1) // block
        dtype = cupy.complex128

        mod = cupy.RawModule(
            code=_test_cuComplex,
            translate_cucomplex=True)
        a = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        a = a.astype(dtype)
        b = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        b = b.astype(dtype)
        c = cupy.random.random((N,)) + 1j*cupy.random.random((N,))
        c = c.astype(dtype)
        out = cupy.zeros((N,), dtype=dtype)
        out_float = cupy.zeros((N,), dtype=cupy.float64)
        out_down = cupy.zeros((N,), dtype=cupy.complex64)

        ker = mod.get_function('test_add')
        ker((grid,), (block,), (a, b, out))
        assert (out == a + b).all()

        ker = mod.get_function('test_sub')
        ker((grid,), (block,), (a, b, out))
        assert (out == a - b).all()

        ker = mod.get_function('test_mul')
        ker((grid,), (block,), (a, b, out))
        assert cupy.allclose(out, a * b)

        ker = mod.get_function('test_div')
        ker((grid,), (block,), (a, b, out))
        assert (out == a / b).all()

        ker = mod.get_function('test_conj')
        ker((grid,), (block,), (a, out))
        assert (out == cupy.conj(a)).all()

        ker = mod.get_function('test_abs')
        ker((grid,), (block,), (a, out_float))
        assert (out_float == cupy.abs(a)).all()

        ker = mod.get_function('test_fma')
        ker((grid,), (block,), (a, b, c, out))
        assert cupy.allclose(out, a * b + c)

        ker = mod.get_function('test_make')
        ker((grid,), (block,), (out,))
        assert (out == 1.8 - 1j * 8.7).all()

        ker = mod.get_function('test_downcast')
        ker((grid,), (block,), (a, out_down))
        assert (out_down == a.astype(cupy.complex64)).all()

        # NumPy scalars.
        b = cupy.complex128(2 + 3j)
        ker = mod.get_function('test_add_scalar')
        ker((grid,), (block,), (a, b, out))
        assert (out == a + b).all()

        # Python scalars.
        b = 2 + 3j
        ker = mod.get_function('test_add_scalar')
        ker((grid,), (block,), (a, b, out))
        assert (out == a + b).all()
Example #59
0
def einsum(*operands, **kwargs):
    """einsum(subscripts, *operands, dtype=False)

    Evaluates the Einstein summation convention on the operands.
    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion. This function
    provides a way to compute such summations.

    .. note::
       Memory contiguity of calculation result is not always compatible with
       `numpy.einsum`.
       ``out``, ``order``, and ``casting`` options are not supported.

    Args:
        subscripts (str): Specifies the subscripts for summation.
        operands (sequence of arrays): These are the arrays for the operation.

    Returns:
        cupy.ndarray:
            The calculation based on the Einstein summation convention.

    .. seealso:: :func:`numpy.einsum`

    """

    input_subscripts, output_subscript, operands = \
        _parse_einsum_input(operands)
    assert isinstance(input_subscripts, list)
    assert isinstance(operands, list)

    dtype = kwargs.pop('dtype', None)

    # casting = kwargs.pop('casting', 'safe')
    casting_kwargs = {}  # casting is not supported yet in astype

    optimize = kwargs.pop('optimize', False)
    if optimize is True:
        optimize = 'greedy'
    if kwargs:
        raise TypeError('Did not understand the following kwargs: %s'
                        % list(kwargs.keys))

    result_dtype = cupy.result_type(*operands) if dtype is None else dtype
    operands = [
        cupy.asanyarray(arr)
        for arr in operands
    ]

    input_subscripts = [
        _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim)
        for idx, (sub, arr) in enumerate(zip(input_subscripts, operands))
    ]

    # Get length of each unique dimension and ensure all dimensions are correct
    dimension_dict = {}
    for idx, sub in enumerate(input_subscripts):
        sh = operands[idx].shape
        for axis, label in enumerate(sub):
            dim = sh[axis]
            if label in dimension_dict.keys():
                # For broadcasting cases we always want the largest dim size
                if dimension_dict[label] == 1:
                    dimension_dict[label] = dim
                elif dim not in (1, dimension_dict[label]):
                    dim_old = dimension_dict[label]
                    raise ValueError(
                        'Size of label \'%s\' for operand %d (%d) '
                        'does not match previous terms (%d).'
                        % (_chr(label), idx, dim, dim_old))
            else:
                dimension_dict[label] = dim

    if output_subscript is None:
        # Build output subscripts
        tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts))
        output_subscript = [
            label
            for label in sorted(set(tmp_subscripts))
            if label < 0 or tmp_subscripts.count(label) == 1
        ]
    else:
        if not options['sum_ellipsis']:
            if '@' not in output_subscript and -1 in dimension_dict:
                raise ValueError(
                    'output has more dimensions than subscripts '
                    'given in einstein sum, but no \'...\' ellipsis '
                    'provided to broadcast the extra dimensions.')
        output_subscript = _parse_ellipsis_subscript(
            output_subscript, None,
            ellipsis_len=sum(label < 0 for label in dimension_dict.keys())
        )

        # Make sure output subscripts are in the input
        tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts))
        for label in output_subscript:
            if label not in tmp_subscripts:
                raise ValueError(
                    'einstein sum subscripts string included output subscript '
                    '\'%s\' which never appeared in an input' % _chr(label))
        if len(output_subscript) != len(set(output_subscript)):
            for label in output_subscript:
                if output_subscript.count(label) >= 2:
                    raise ValueError(
                        'einstein sum subscripts string includes output '
                        'subscript \'%s\' multiple times' % _chr(label))

    _einsum_diagonals(input_subscripts, operands)

    # no more raises

    if len(operands) >= 2:
        if any(arr.size == 0 for arr in operands):
            return cupy.zeros(
                tuple(dimension_dict[label] for label in output_subscript),
                dtype=result_dtype
            )

        # Don't squeeze if unary, because this affects later (in trivial sum)
        # whether the return is a writeable view.
        for idx in six.moves.range(len(operands)):
            arr = operands[idx]
            if 1 in arr.shape:
                squeeze_indices = []
                sub = []
                for axis, label in enumerate(input_subscripts[idx]):
                    if arr.shape[axis] == 1:
                        squeeze_indices.append(axis)
                    else:
                        sub.append(label)
                input_subscripts[idx] = sub
                operands[idx] = cupy.squeeze(arr, axis=tuple(squeeze_indices))
                assert operands[idx].ndim == len(input_subscripts[idx])
            del arr

    # unary einsum without summation should return a (writeable) view
    returns_view = len(operands) == 1

    # unary sum
    for idx, sub in enumerate(input_subscripts):
        other_subscripts = copy.copy(input_subscripts)
        other_subscripts[idx] = output_subscript
        other_subscripts = set(itertools.chain.from_iterable(other_subscripts))
        sum_axes = tuple(
            axis
            for axis, label in enumerate(sub)
            if label not in other_subscripts
        )
        if sum_axes:
            returns_view = False
            input_subscripts[idx] = [
                label
                for axis, label in enumerate(sub)
                if axis not in sum_axes
            ]

            operands[idx] = operands[idx].sum(
                axis=sum_axes, dtype=result_dtype)

    if returns_view:
        operands = [a.view() for a in operands]
    else:
        operands = [
            a.astype(result_dtype, copy=False, **casting_kwargs)
            for a in operands
        ]

    # no more casts

    optimize_algorithms = {
        'greedy': _greedy_path,
        'optimal': _optimal_path,
    }
    if optimize is False:
        path = [tuple(six.moves.range(len(operands)))]
    elif len(optimize) and (optimize[0] == 'einsum_path'):
        path = optimize[1:]
    else:
        try:
            if len(optimize) == 2 and isinstance(optimize[1], (int, float)):
                algo = optimize_algorithms[optimize[0]]
                memory_limit = int(optimize[1])
            else:
                algo = optimize_algorithms[optimize]
                memory_limit = 2 ** 31  # TODO(kataoka): fix?
        except (TypeError, KeyError):  # unhashable type or not found
            raise TypeError('Did not understand the path (optimize): %s'
                            % str(optimize))
        input_sets = [set(sub) for sub in input_subscripts]
        output_set = set(output_subscript)
        path = algo(input_sets, output_set, dimension_dict, memory_limit)
        if any(len(indices) > 2 for indices in path):
            warnings.warn(
                'memory efficient einsum is not supported yet',
                util.PerformanceWarning)

    for idx0, idx1 in _iter_path_pairs(path):
        # "reduced" binary einsum
        arr0 = operands.pop(idx0)
        sub0 = input_subscripts.pop(idx0)
        arr1 = operands.pop(idx1)
        sub1 = input_subscripts.pop(idx1)
        sub_others = list(itertools.chain(
            output_subscript,
            itertools.chain.from_iterable(input_subscripts)))
        arr_out, sub_out = reduced_binary_einsum(
            arr0, sub0, arr1, sub1, sub_others)
        operands.append(arr_out)
        input_subscripts.append(sub_out)
        del arr0, arr1

    # unary einsum at last
    arr0, = operands
    sub0, = input_subscripts

    transpose_axes = []
    for label in output_subscript:
        if label in sub0:
            transpose_axes.append(sub0.index(label))

    arr_out = arr0.transpose(transpose_axes).reshape([
        dimension_dict[label]
        for label in output_subscript
    ])
    assert returns_view or arr_out.dtype == result_dtype
    return arr_out
Example #60
0
    def _minor_reduce(self, ufunc, axis, nonzero):
        """Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Matrix over which the reduction should be
                conducted.

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
                major_index.

        """

        # Call to the appropriate kernel function
        if axis == 1:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[0]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))

            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[0], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[1]), value))

        if axis == 0:
            # Create the vector to hold output
            value = cupy.zeros(self.shape[1]).astype(cupy.float64)

            if nonzero:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_nonzero_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
                if ufunc == cupy.amin:
                    self._min_nonzero_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
            else:
                # Perform the calculation
                if ufunc == cupy.amax:
                    self._max_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))
                if ufunc == cupy.amin:
                    self._min_reduction_kern(
                        (self.shape[1], ), (1, ),
                        (self.data.astype(
                            cupy.float64), self.indptr[:len(self.indptr) - 1],
                         self.indptr[1:], cupy.int64(self.shape[0]), value))

        return value