Ejemplo n.º 1
def main(model_dir, train_dir, dev_dir,
         nr_hidden=64, max_length=100, # Shape
         dropout=0.5, learn_rate=0.001, # General NN config
         nb_epoch=5, batch_size=32, nr_examples=-1):  # Training params
    model_dir = pathlib.Path(model_dir)
    train_dir = pathlib.Path(train_dir)
    dev_dir = pathlib.Path(dev_dir)
    if is_runtime:
        dev_texts, dev_labels = read_data(dev_dir)
        acc = evaluate(model_dir, dev_texts, dev_labels, max_length=max_length)
        print("Read data")
        train_texts, train_labels = read_data(train_dir, limit=nr_examples)
        dev_texts, dev_labels = read_data(dev_dir, limit=nr_examples)
        print("Using GPU 0")
        train_labels = xp.asarray(train_labels, dtype='i')
        dev_labels = xp.asarray(dev_labels, dtype='i')
        lstm = train(train_texts, train_labels, dev_texts, dev_labels,
                     {'nr_hidden': nr_hidden, 'max_length': max_length, 'nr_class': 2,
                      'nr_vector': 2000, 'nr_dim': 32},
                      {'dropout': 0.5, 'lr': learn_rate},
                      nb_epoch=nb_epoch, batch_size=batch_size)
Ejemplo n.º 2
def _convert_array(xs, array_module):
    if array_module == 'all_numpy':
        return xs
    elif array_module == 'all_cupy':
        return cupy.asarray(xs)
        return [cupy.asarray(x) if numpy.random.random_integers(0, 1)
                else x for x in xs]
Ejemplo n.º 3
Archivo: cuda.py Proyecto: hvy/chainer
def _array_to_gpu(array, device, stream):
    if array is None:
        return None

    if isinstance(array, chainerx.ndarray):
        # TODO(niboshi): Update this logic once both CuPy and ChainerX support
        # the array interface.
        if array.device.backend.name == 'cuda':
            # Convert to cupy.ndarray on the same device as source array
            array = cupy.ndarray(
                        array.data_ptr + array.offset,
            array = chainerx.to_numpy(array)
    elif isinstance(array, (numpy.number, numpy.bool_)):
        array = numpy.asarray(array)
    elif isinstance(array, intel64.mdarray):
        array = numpy.asarray(array)

    if isinstance(array, ndarray):
        if array.device == device:
            return array
        is_numpy = False
    elif isinstance(array, numpy.ndarray):
        is_numpy = True
        raise TypeError(
            'The array sent to gpu must be an array or a NumPy scalar.'
            '\nActual type: {0}.'.format(type(array)))

    if stream is not None:
        with device:
            with stream:
                if is_numpy:
                    return cupy.asarray(array)
                # Need to make a copy when an array is copied to another device
                return cupy.array(array, copy=True)

    with device:
        if is_numpy:
            return cupy.asarray(array)
        # Need to make a copy when an array is copied to another device
        return cupy.array(array, copy=True)
Ejemplo n.º 4
def _fftconv(a, b, axes=(0, 1)):
    """Patched version of :func:`sporco.linalg.fftconv`."""

    if cp.isrealobj(a) and cp.isrealobj(b):
        fft = cp.fft.rfftn
        ifft = cp.fft.irfftn
        fft = cp.fft.fftn
        ifft = cp.fft.ifftn
    dims = cp.maximum(cp.asarray([a.shape[i] for i in axes]),
                      cp.asarray([b.shape[i] for i in axes]))
    dims = [int(d) for d in dims]
    af = fft(a, dims, axes)
    bf = fft(b, dims, axes)
    return ifft(af * bf, dims, axes)
Ejemplo n.º 5
def to_gpu(array, device=None, stream=None):
    """Copies the given CPU array to specified device.

        array: Array to be sent to GPU.
        device: Device specifier.
        stream (cupy.cuda.Stream): CUDA stream.

        cupy.ndarray: Array on GPU.

        If ``array`` is already on GPU, then this function just returns
        ``array`` without performing any copy. Note that this function does not
        copy :class:`cupy.ndarray` into specified device.

    assert stream is None  # TODO(beam2d): FIX IT
    with get_device(device):
        dev_id = int(get_device(array))
        if dev_id != -1 and dev_id != cupy.cuda.device.get_device_id():
            # Need to make a copy when an array is copied to another device
            return cupy.array(array, copy=True)
            return cupy.asarray(array)
Ejemplo n.º 6
 def setup_method(self, method):
     N = 32
     self.U = cp.ones((N, N, N))
     self.U[:, 0:(old_div(N, 2)), :] = -1
     self.V = 1e-1 * cp.asarray(np.random.randn(N, N, N))
     self.D = self.U + self.V
Ejemplo n.º 7
def _list2array(lst):
    """Convert a list to a numpy array."""

    if lst and isinstance(lst[0], cp.ndarray):
        return cp.hstack(lst)
        return cp.asarray(lst)
Ejemplo n.º 8
    def check(self, func, n, gen, *args):

        def f(*x):
            return func(*x)

        if type(gen) == tuple:
            ndata = [g(*a) for i, g, a in zip(range(n), list(gen), args)]
            ndata = [gen(*args) for i in range(n)]
        nret = func(*ndata)
        fnret = f(*ndata)
        nret = list(nret) if type(nret) == tuple else [nret]
        fnret = list(fnret) if type(fnret) == tuple else [fnret]
        for n, fn in zip(nret, fnret):
            numpy.testing.assert_array_almost_equal(n, fn)

        cdata = [cupy.asarray(_) for _ in ndata]
        cret = func(*cdata)
        fcret = f(*cdata)
        cret = list(cret) if type(cret) == tuple else [cret]
        fcret = list(fcret) if type(fcret) == tuple else [fcret]
        for n, c, fc in zip(nret, cret, fcret):
            numpy.testing.assert_array_almost_equal(n, c.get())
            numpy.testing.assert_array_almost_equal(n, fc.get())
Ejemplo n.º 9
def to_cupy(array):  # pragma: no cover
    import cupy

    if isinstance(array, np.ndarray):
        return cupy.asarray(array)

    return array
Ejemplo n.º 10
 def _get_labelled_sentences(self, docs, doc_labels):
     labels = []
     sentences = []
     for doc, y in izip(docs, doc_labels):
         for sent in doc.sents:
     return sentences, xp.asarray(labels, dtype='i')
Ejemplo n.º 11
def to_gpu(array, device=None, stream=None):
    """Copies the given CPU array to specified device.

        array: Array to be sent to GPU.
        device: Device specifier.
        stream (cupy.cuda.Stream): CUDA stream. If not ``None``, the copy runs

        cupy.ndarray: Array on GPU.

        If ``array`` is already on GPU, then this function just returns
        ``array`` without performing any copy. Note that this function does not
        copy :class:`cupy.ndarray` into specified device.

    with _get_device(device):
        array_dev = get_device_from_array(array)
        if array_dev.id == cupy.cuda.device.get_device_id():
            return array

        if stream is not None:
                'The stream option is deprecated in chainer.cuda.to_gpu. '
                'Please remove it.', DeprecationWarning)
            if stream.ptr != 0:
                ret = cupy.empty_like(array)
                if array_dev.id == -1:
                    # cpu to gpu
                    mem = cupy.cuda.alloc_pinned_memory(array.nbytes)
                    src = numpy.frombuffer(
                        mem, array.dtype, array.size).reshape(array.shape)
                    src[...] = array
                    ret.set(src, stream)
                        stream.record(), mem)
                    # gpu to gpu
                    with array_dev:
                        src = array.copy()
                        event = Stream.null.record()
                        src.data, src.nbytes, stream)

                    # to hold a reference until the end of the asynchronous
                    # memcpy
                    stream.add_callback(lambda *x: None, (src, ret))
                return ret

        if array_dev.id == -1:
            return cupy.asarray(array)

        # Need to make a copy when an array is copied to another device
        return cupy.array(array, copy=True)
Ejemplo n.º 12
def asanyarray(a, dtype=None):
    """Converts an object to array.

    This is equivalent to cupy.asarray.

    .. seealso:: :func:`cupy.asarray`, :func:`numpy.asanyarray`

    return cupy.asarray(a, dtype)
Ejemplo n.º 13
 def check_usv(self, array, dtype):
     a_cpu = numpy.asarray(array, dtype=dtype)
     a_gpu = cupy.asarray(array, dtype=dtype)
     result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices)
     result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices)
     self.assertEqual(len(result_cpu), len(result_gpu))
     for b_cpu, b_gpu in zip(result_cpu, result_gpu):
         # Use abs to support an inverse vector
             numpy.abs(b_cpu), cupy.abs(b_gpu), atol=1e-4)
Ejemplo n.º 14
    def check_reduce(self, func, n, reduce_f, gen, *args):

        @cupy.fuse(input_num=n, reduce=reduce_f)
        def f(*x):
            return func(*x)

        ndata = [gen(*args) for i in range(n)]
        fnret = f(*ndata)
        cdata = [cupy.asarray(_) for _ in ndata]
        fcret = f(*cdata)
        numpy.testing.assert_array_almost_equal(fnret, fcret.get())
Ejemplo n.º 15
 def check_mode(self, array, mode, dtype):
     a_cpu = numpy.asarray(array, dtype=dtype)
     a_gpu = cupy.asarray(array, dtype=dtype)
     result_cpu = numpy.linalg.qr(a_cpu, mode=mode)
     result_gpu = cupy.linalg.qr(a_gpu, mode=mode)
     if isinstance(result_cpu, tuple):
         for b_cpu, b_gpu in six.moves.zip(result_cpu, result_gpu):
             self.assertEqual(b_cpu.dtype, b_gpu.dtype)
             cupy.testing.assert_allclose(b_cpu, b_gpu, atol=1e-4)
         self.assertEqual(result_cpu.dtype, result_gpu.dtype)
         cupy.testing.assert_allclose(result_cpu, result_gpu, atol=1e-4)
Ejemplo n.º 16
def asanyarray(a, dtype=None):
    """Converts an object to array.

    This is currently equivalent to :func:`~cupy.asarray`, since there is no
    subclass of ndarray in CuPy. Note that the original
    :func:`numpy.asanyarray` returns the input array as is if it is an instance
    of a subtype of numpy.ndarray.

    .. seealso:: :func:`cupy.asarray`, :func:`numpy.asanyarray`

    return cupy.asarray(a, dtype)
Ejemplo n.º 17
Archivo: cuda.py Proyecto: tkng/chainer
def _array_to_gpu(array, device, stream):
    assert device is DummyDevice or isinstance(device, Device)
    if array is None:
        return None

    if isinstance(array, (numpy.number, numpy.bool_)):
        array = numpy.asarray(array)
    elif isinstance(array, intel64.mdarray):
        array = numpy.asarray(array)

    if not isinstance(array, (cupy.ndarray, numpy.ndarray)):
        raise TypeError(
            'The array sent to gpu must be an array or a NumPy scalar.'
            '\nActual type: {0}.'.format(type(array)))

    array_dev = get_device_from_array(array)
    if array_dev.id == cupy.cuda.device.get_device_id():
        return array

    if stream is not None and stream.ptr != 0:
        ret = cupy.empty_like(array)
        if array_dev.id == -1:
            # cpu to gpu
            mem = cupy.cuda.alloc_pinned_memory(array.nbytes)
            src = numpy.frombuffer(
                mem, array.dtype, array.size).reshape(array.shape)
            src[...] = array
            ret.set(src, stream)
                stream.record(), mem)
            # gpu to gpu
            with array_dev:
                src = array.copy()
                event = Stream.null.record()
                src.data, src.nbytes, stream)

            # to hold a reference until the end of the asynchronous
            # memcpy
            stream.add_callback(lambda *x: None, (src, ret))
        return ret

    if array_dev.id == -1:
        return cupy.asarray(array)

    # Need to make a copy when an array is copied to another device
    return cupy.array(array, copy=True)
Ejemplo n.º 18
def ix_(*args):
    """Construct an open mesh from multiple sequences.

    This function takes N 1-D sequences and returns N outputs with N
    dimensions each, such that the shape is 1 in all but one dimension
    and the dimension with the non-unit shape value cycles through all
    N dimensions.

    Using `ix_` one can quickly construct index arrays that will index
    the cross product. ``a[cupy.ix_([1,3],[2,5])]`` returns the array
    ``[[a[1,2] a[1,5]], [a[3,2] a[3,5]]]``.

        *args: 1-D sequences

        tuple of ndarrays:
        N arrays with N dimensions each, with N the number of input sequences.
        Together these arrays form an open mesh.

    >>> a = cupy.arange(10).reshape(2, 5)
    >>> a
    array([[0, 1, 2, 3, 4],
           [5, 6, 7, 8, 9]])
    >>> ixgrid = cupy.ix_([0,1], [2,4])
    >>> ixgrid
           [1]]), array([[2, 4]]))

     .. seealso:: :func:`numpy.ix_`

    out = []
    nd = len(args)
    for k, new in enumerate(args):
        new = cupy.asarray(new)
        if new.ndim != 1:
            raise ValueError("Cross index must be 1 dimensional")
        if new.size == 0:
            # Explicitly type empty arrays to avoid float default
            new = new.astype(numpy.intp)
        if cupy.issubdtype(new.dtype, cupy.bool_):
            new, = new.nonzero()
        new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1))
    return tuple(out)
Ejemplo n.º 19
def to_gpu(array, device=None, stream=None):
    """Copies the given CPU array to specified device.

        array: Array to be sent to GPU.
        device: Device specifier.
        stream (cupy.cuda.Stream): CUDA stream. If not ``None``, the copy runs

        cupy.ndarray: Array on GPU.

        If ``array`` is already on GPU, then this function just returns
        ``array`` without performing any copy. Note that this function does not
        copy :class:`cupy.ndarray` into specified device.

    with get_device(device):
        array_dev = get_device(array)
        if array_dev.id == cupy.cuda.device.get_device_id():
            return array

        if stream is not None:
            ret = cupy.empty_like(array)
            if array_dev.id == -1:
                # cpu to gpu
                src = array.copy(order='C')
                ret.set(src, stream)
                # gpu to gpu
                with array_dev:
                    src = array.copy()
                ret.data.copy_from_device_async(src.data, src.nbytes, stream)

            # to hold a reference until the end of the asynchronous memcpy
            stream.add_callback(lambda *x: None, (src, ret))
            return ret

        if array_dev.id == -1:
            return cupy.asarray(array)

        # Need to make a copy when an array is copied to another device
        return cupy.array(array, copy=True)
Ejemplo n.º 20
def to_gpu(array, device=None, stream=None):
    """Copies the given CPU array to specified device.

        array: Array to be sent to GPU.
        device: Device specifier.
        stream (cupy.cuda.Stream): CUDA stream.

        cupy.ndarray: Array on GPU.

        If ``array`` is already on GPU, then this function just returns
        ``array`` without performing any copy. Note that this function does not
        copy cupy.ndarray into specified device.

    assert stream is None  # TODO(beam2d): FIX IT
    with get_device(device):
        return cupy.asarray(array)
Ejemplo n.º 21
def affine(volume: np.ndarray,
           transform_m: np.ndarray,
           interpolation: str = 'linear',
           reshape: bool = False,
           profile: bool = False,
           device: str = 'cpu'):

    if device not in AVAILABLE_DEVICES:
        raise ValueError(
            f'Unknown device ({device}), must be one of {AVAILABLE_DEVICES}')

    if device == 'cpu':

        if profile:
            t_start = time.time()

        # set parameters for scipy affine transform
        if interpolation == 'linear':
            order = 1
            order = 3

        if not interpolation.startswith('filt_bspline'):
            prefilter = False
            prefilter = True

        if reshape:
            pad_before, pad_after, output_shape = utils.compute_post_transform_dimensions(
                volume.shape, transform_m)

            # scipy will take care of padding in this case
            # but we need to apply pad_before offset to transform_m get full volume
            transform_m = np.dot(
                transform_m, translation_matrix(pad_before, transform_m.dtype))

            output_shape = volume.shape

        # run affine transformation
        output_vol = affine_transform(volume,

        if profile:
            t_end = time.time()
            time_took = (t_end - t_start) * 1000
            print(f'transform finished in {time_took:.3f}ms')

        if output is not None:
            return output
            return output_vol

    elif device.startswith('gpu'):

        if profile:
            stream = cp.cuda.Stream.null
            t_start = stream.record()

        if reshape:
            pad_before, pad_after, output_shape = utils.compute_post_transform_dimensions(
                volume.shape, transform_m)

            # manually pad volume
            volume = np.pad(volume,
                            list(zip(pad_before, pad_after)),

            # include pad_before offset: first apply offset, then apply negative offset
            transform_m = translation_matrix(
                -1 * pad_before) @ transform_m @ translation_matrix(pad_before)

        volume = cp.asarray(volume)
        volume_shape = volume.shape

        # texture setup
        ch = cp.cuda.texture.ChannelFormatDescriptor(
            32, 0, 0, 0, cp.cuda.runtime.cudaChannelFormatKindFloat)
        arr = cp.cuda.texture.CUDAarray(
            ch, *volume_shape[::-1]
        )  # CUDAArray: last dimension=fastest changing dimension
        res = cp.cuda.texture.ResourceDescriptor(
            cp.cuda.runtime.cudaResourceTypeArray, cuArr=arr)
        tex = cp.cuda.texture.TextureDescriptor(
        texobj = cp.cuda.texture.TextureObject(res, tex)

        # prefilter if required and upload to texture
        if interpolation.startswith('filt_bspline'):
            volume = _bspline_prefilter(volume)

        # kernel setup
        kernel = _get_transform_kernel(interpolation)
        dims = cp.asarray(volume_shape, dtype=cp.uint32)
        xform = cp.asarray(transform_m)
        dim_grid, dim_blocks = utils.compute_elementwise_launch_dims(

        if output is None:
            volume.fill(0.0)  # reuse input array
            volume = output

        kernel(dim_grid, dim_blocks, (volume, texobj, xform, dims))

        if profile:
            t_end = stream.record()

            time_took = cp.cuda.get_elapsed_time(t_start, t_end)
            print(f'transform finished in {time_took:.3f}ms')

        if output is None:
            del texobj, xform, dims
            return volume.get()
            del texobj, xform, dims
            return None

        raise ValueError(f'No instructions for {device}.')
Ejemplo n.º 22
    def test_compare_xp_gpu(self):
        noisyimg_gpu = cp.asarray(self.noisyimg)
        imgivar_gpu = cp.asarray(self.imgivar)
        A4_gpu = cp.asarray(self.A4)

        # Compare the "signal" decorrelation method
        flux0, ivar0, R0 = ex2d_patch(self.noisyimg,
        flux1_gpu, ivar1_gpu, R1_gpu = xp_ex2d_patch(noisyimg_gpu,

        flux1 = cp.asnumpy(flux1_gpu)
        ivar1 = cp.asnumpy(ivar1_gpu)
        R1 = cp.asnumpy(R1_gpu)

        eps_double = np.finfo(np.float64).eps

        where = np.where(
            ~np.isclose(flux0, flux1, rtol=1e5 * eps_double, atol=0))
                                   rtol=1e5 * eps_double,
                                   err_msg=f"where: {where}")
            np.allclose(ivar0, ivar1, rtol=1e3 * eps_double, atol=0))
                        rtol=1e2 * eps_double,
                        atol=1e3 * eps_double))
                np.abs(flux0 - flux1) / np.sqrt(1. / ivar0 + 1. / ivar1),

        # Compare the "noise" decorrelation method
        flux0, ivar0, R0 = ex2d_patch(self.noisyimg,
        flux1_gpu, ivar1_gpu, R1_gpu = xp_ex2d_patch(noisyimg_gpu,

        flux1 = cp.asnumpy(flux1_gpu)
        ivar1 = cp.asnumpy(ivar1_gpu)
        R1 = cp.asnumpy(R1_gpu)

            np.allclose(flux0, flux1, rtol=1e5 * eps_double, atol=0))
            np.allclose(ivar0, ivar1, rtol=1e3 * eps_double, atol=0))
                        rtol=1e2 * eps_double,
                np.abs(flux0 - flux1) / np.sqrt(1. / ivar0 + 1. / ivar1),
Ejemplo n.º 23
    def evaluate(self):

        # Perform MC reweighting
        if _GPU_ENABLED:
            import cupy as xp
            import numpy as xp

        m1_src = self.fiducial_binaries["mass_1_source"]
        m2_src = self.fiducial_binaries["mass_2_source"]
        spin_1x, spin_1y, spin_1z = [
            for k in ["spin_1x", "spin_1y", "spin_1z"]
        spin_2x, spin_2y, spin_2z = [
            for k in ["spin_2x", "spin_2y", "spin_2z"]
        pdf_mass_fiducial = self.pdf_mass_fiducial
        pdf_spin_fiducial = self.pdf_spin_fiducial

        # Move data to GPU if needed
        m1_src = xp.asarray(m1_src)
        m2_src = xp.asarray(m2_src)
        spin_1x = xp.asarray(spin_1x)
        spin_1y = xp.asarray(spin_1y)
        spin_1z = xp.asarray(spin_1z)
        spin_2x = xp.asarray(spin_2x)
        spin_2y = xp.asarray(spin_2y)
        spin_2z = xp.asarray(spin_2z)
        pdf_mass_fiducial = xp.asarray(pdf_mass_fiducial)
        pdf_spin_fiducial = xp.asarray(pdf_spin_fiducial)

        pdf_mass_pop = self.mass_src_pop_model.prob(
                "mass_1_source": m1_src,
                "mass_2_source": m2_src
            }, axis=0)
        weights_mass = pdf_mass_pop / pdf_mass_fiducial
        pdf_spin_pop = self.spin_src_pop_model.prob({
            "spin_1x": spin_1x,
            "spin_1y": spin_1y,
            "spin_1z": spin_1z,
            "spin_2x": spin_2x,
            "spin_2y": spin_2y,
            "spin_2z": spin_2z,
        weights_spin = pdf_spin_pop / pdf_spin_fiducial

        weights_source = weights_mass * weights_spin

        z = self.fiducial_z
        pz = NotLensedSourceRedshiftProbDist(
        pdf_z_fiducial = self.pdf_z_fiducial
        pdf_z_pop = pz.prob(z)  # NOTE p_z still uses CPU-only code
        weights_z = xp.asarray(pdf_z_pop / pdf_z_fiducial)

        predictions = xp.asarray(self.predictions)
        alpha = xp.sum(predictions * weights_source *
                       weights_z).astype(float) / (float(self.N_inj))

        # NOTE If using numpy, alpha is a scalar but if using cupy, alpha is a 0-d array
        return float(alpha)
Ejemplo n.º 24
Archivo: svd.py Proyecto: carlgogo/VIP
def svd_wrapper(matrix, mode, ncomp, verbose, full_output=False,
                random_state=None, to_numpy=True):
    """ Wrapper for different SVD libraries (CPU and GPU). 
    matrix : numpy ndarray, 2d
        2d input matrix.
    mode : {'lapack', 'arpack', 'eigen', 'randsvd', 'cupy', 'eigencupy',
        'randcupy', 'pytorch', 'eigenpytorch', 'randpytorch'}, str optional
        Switch for the SVD method/library to be used.

        ``lapack``: uses the LAPACK linear algebra library through Numpy
        and it is the most conventional way of computing the SVD
        (deterministic result computed on CPU).

        ``arpack``: uses the ARPACK Fortran libraries accessible through
        Scipy (computation on CPU).

        ``eigen``: computes the singular vectors through the
        eigendecomposition of the covariance M.M' (computation on CPU).

        ``randsvd``: uses the randomized_svd algorithm implemented in
        Sklearn (computation on CPU).

        ``cupy``: uses the Cupy library for GPU computation of the SVD as in
        the LAPACK version. `

        `eigencupy``: offers the same method as with the ``eigen`` option
        but on GPU (through Cupy).

        ``randcupy``: is an adaptation of the randomized_svd algorithm,
        where all the computations are done on a GPU (through Cupy). `

        `pytorch``: uses the Pytorch library for GPU computation of the SVD.

        ``eigenpytorch``: offers the same method as with the ``eigen``
        option but on GPU (through Pytorch).

        ``randpytorch``: is an adaptation of the randomized_svd algorithm,
        where all the linear algebra computations are done on a GPU
        (through Pytorch).

    ncomp : int
        Number of singular vectors to be obtained. In the cases when the full
        SVD is computed (LAPACK, ARPACK, EIGEN, CUPY), the matrix of singular 
        vectors is truncated.
    verbose: bool
        If True intermediate information is printed out.
    full_output : bool optional
        If True the 3 terms of the SVD factorization are returned. If ``mode``
        is eigen then only S and V are returned.
    random_state : int, RandomState instance or None, optional
        If int, random_state is the seed used by the random number generator.
        If RandomState instance, random_state is the random number generator.
        If None, the random number generator is the RandomState instance used
        by np.random. Used for ``randsvd`` mode.
    to_numpy : bool, optional
        If True (by default) the arrays computed in GPU are transferred from
        VRAM and converted to numpy ndarrays.

    V : numpy ndarray
        The right singular vectors of the input matrix. If ``full_output`` is
        True it returns the left and right singular vectors and the singular
        values of the input matrix. If ``mode`` is set to eigen then only S and
        V are returned.
    * For ``lapack`` SVD mode see:
    * For ``eigen`` mode see:
    * For ``arpack`` SVD mode see:
    * For ``randsvd`` SVD mode see:
        Finding structure with randomness: Stochastic algorithms for constructing
        approximate matrix decompositions
        Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
    * For ``cupy`` SVD mode see:
    * For ``eigencupy`` mode see:
    * For ``pytorch`` SVD mode see:
    * For ``eigenpytorch`` mode see:

    if matrix.ndim != 2:
        raise TypeError('Input matrix is not a 2d array')

    if ncomp > min(matrix.shape[0], matrix.shape[1]):
        msg = '{} PCs cannot be obtained from a matrix with size [{},{}].'
        msg += ' Increase the size of the patches or request less PCs'
        raise RuntimeError(msg.format(ncomp, matrix.shape[0], matrix.shape[1]))

    if mode == 'eigen':
        # building C as np.dot(matrix.T,matrix) is slower and takes more memory
        C = np.dot(matrix, matrix.T)    # covariance matrix
        e, EV = linalg.eigh(C)          # EVals and EVs
        pc = np.dot(EV.T, matrix)       # PCs using a compact trick when cov is MM'
        V = pc[::-1]                    # reverse since we need the last EVs
        S = np.sqrt(np.abs(e))          # SVals = sqrt(EVals)
        S = S[::-1]                     # reverse since EVals go in increasing order
        for i in range(V.shape[1]):
            V[:, i] /= S    # scaling EVs by the square root of EVals
        V = V[:ncomp]
        if verbose:
            print('Done PCA with numpy linalg eigh functions')

    elif mode == 'lapack':
        # n_frames is usually smaller than n_pixels. In this setting taking
        # the SVD of M' and keeping the left (transposed) SVs is faster than
        # taking the SVD of M (right SVs)
        U, S, V = linalg.svd(matrix.T, full_matrices=False)
        V = V[:ncomp]       # we cut projection matrix according to the # of PCs
        U = U[:, :ncomp]
        S = S[:ncomp]
        if verbose:
            print('Done SVD/PCA with numpy SVD (LAPACK)')

    elif mode == 'arpack':
        U, S, V = svds(matrix, k=ncomp)
        if verbose:
            print('Done SVD/PCA with scipy sparse SVD (ARPACK)')

    elif mode == 'randsvd':
        U, S, V = randomized_svd(matrix, n_components=ncomp, n_iter=2,
                                 transpose='auto', random_state=random_state)
        if verbose:
            print('Done SVD/PCA with randomized SVD')

    elif mode == 'cupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        a_gpu = cupy.array(matrix)
        a_gpu = cupy.asarray(a_gpu)  # move the data to the current device
        u_gpu, s_gpu, vh_gpu = cupy.linalg.svd(a_gpu, full_matrices=True,
        V = vh_gpu[:ncomp]
        if to_numpy:
            V = cupy.asnumpy(V)
        if full_output:
            S = s_gpu[:ncomp]
            if to_numpy:
                S = cupy.asnumpy(S)
            U = u_gpu[:, :ncomp]
            if to_numpy:
                U = cupy.asnumpy(U)
        if verbose:
            print('Done SVD/PCA with cupy (GPU)')

    elif mode == 'randcupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        U, S, V = randomized_svd_gpu(matrix, ncomp, n_iter=2, lib='cupy')
        if to_numpy:
            V = cupy.asnumpy(V)
            S = cupy.asnumpy(S)
            U = cupy.asnumpy(U)
        if verbose:
            print('Done randomized SVD/PCA with cupy (GPU)')

    elif mode == 'eigencupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        a_gpu = cupy.array(matrix)
        a_gpu = cupy.asarray(a_gpu)     # move the data to the current device
        C = cupy.dot(a_gpu, a_gpu.T)    # covariance matrix
        e, EV = cupy.linalg.eigh(C)     # eigenvalues and eigenvectors
        pc = cupy.dot(EV.T, a_gpu)      # using a compact trick when cov is MM'
        V = pc[::-1]                    # reverse to get last eigenvectors
        S = cupy.sqrt(e)[::-1]          # reverse since EVals go in increasing order
        for i in range(V.shape[1]):
            V[:, i] /= S                # scaling by the square root of eigvals
        V = V[:ncomp]
        if to_numpy:
            V = cupy.asnumpy(V)
        if verbose:
            print('Done PCA with cupy eigh function (GPU)')

    elif mode == 'pytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        a_gpu = torch.Tensor.cuda(torch.from_numpy(matrix.astype('float32').T))
        u_gpu, s_gpu, vh_gpu = torch.svd(a_gpu)
        V = vh_gpu[:ncomp]
        S = s_gpu[:ncomp]
        U = torch.transpose(u_gpu, 0, 1)[:ncomp]
        if to_numpy:
            V = np.array(V)
            S = np.array(S)
            U = np.array(U)
        if verbose:
            print('Done SVD/PCA with pytorch (GPU)')

    elif mode == 'eigenpytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        a_gpu = torch.Tensor.cuda(torch.from_numpy(matrix.astype('float32')))
        C = torch.mm(a_gpu, torch.transpose(a_gpu, 0, 1))
        e, EV = torch.eig(C, eigenvectors=True)
        V = torch.mm(torch.transpose(EV, 0, 1), a_gpu)
        S = torch.sqrt(e[:, 0])
        for i in range(V.shape[1]):
            V[:, i] /= S
        V = V[:ncomp]
        if to_numpy:
            V = np.array(V)
        if verbose:
            print('Done PCA with pytorch eig function')

    elif mode == 'randpytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        U, S, V = randomized_svd_gpu(matrix, ncomp, n_iter=2, lib='pytorch')
        if to_numpy:
            V = np.array(V)
            S = np.array(S)
            U = np.array(U)
        if verbose:
            print('Done randomized SVD/PCA with randomized pytorch (GPU)')

        raise ValueError('The SVD `mode` is not recognized')

    if full_output:
        if mode == 'lapack':
            return V.T, S, U.T
        elif mode == 'pytorch':
            if to_numpy:
                return V.T, S, U.T
                return torch.transpose(V, 0, 1), S, torch.transpose(U, 0, 1)
        elif mode in ('eigen', 'eigencupy', 'eigenpytorch'):
            return S, V
            return U, S, V
        if mode == 'lapack':
            return U.T
        elif mode == 'pytorch':
            return U
            return V
Ejemplo n.º 25
def preprocess(ctx):
    # function rez = preprocessDataSub(ops)
    # this function takes an ops struct, which contains all the Kilosort2 settings and file paths
    # and creates a new binary file of preprocessed data, logging new variables into rez.
    # The following steps are applied:
    # 1) conversion to float32
    # 2) common median subtraction
    # 3) bandpass filtering
    # 4) channel whitening
    # 5) scaling to int16 values

    params = ctx.params
    probe = ctx.probe
    raw_data = ctx.raw_data
    ir = ctx.intermediate

    fs = params.fs
    fshigh = params.fshigh
    fslow = params.fslow
    Nbatch = ir.Nbatch
    NT = params.NT
    NTbuff = params.NTbuff

    Wrot = cp.asarray(ir.Wrot)

    logger.info("Loading raw data and applying filters.")

    with open(ir.proc_path, 'wb') as fw:  # open for writing processed data
        for ibatch in tqdm(range(Nbatch), desc="Preprocessing"):
            # we'll create a binary file of batches of NT samples, which overlap consecutively
            # on params.ntbuff samples
            # in addition to that, we'll read another params.ntbuff samples from before and after,
            # to have as buffers for filtering

            # number of samples to start reading at.
            i = max(0, (NT - params.ntbuff) * ibatch - 2 * params.ntbuff)
            if ibatch == 0:
                # The very first batch has no pre-buffer, and has to be treated separately
                ioffset = 0
                ioffset = params.ntbuff

            buff = raw_data[:, i:i + NTbuff]
            if buff.size == 0:
                logger.error("Loaded buffer has an empty size!")
                break  # this shouldn't really happen, unless we counted data batches wrong

            nsampcurr = buff.shape[
                1]  # how many time samples the current batch has
            if nsampcurr < NTbuff:
                buff = np.concatenate(
                     np.tile(buff[:, nsampcurr - 1][:, np.newaxis],
                             (1, NTbuff))),

            # apply filters and median subtraction
            buff = cp.asarray(buff, dtype=np.float32)

            datr = gpufilter(buff,

            datr = datr[ioffset:ioffset +
                        NT, :]  # remove timepoints used as buffers
            datr = cp.dot(
                datr, Wrot)  # whiten the data and scale by 200 for int16 range

            # convert to int16, and gather on the CPU side
            datcpu = cp.asnumpy(datr).astype(np.int16)

            # write this batch to binary file
Ejemplo n.º 26
def to_gpu(x):
    import cupy
    if type(x) == cupy.ndarray:
        return x
    return cupy.asarray(x)
Ejemplo n.º 27
    y = np.arange(0, int(col), 1)
    z = np.arange(0, int(sta), 1)
    X, Y, Z = np.meshgrid(x, y, z)
    OSS_alpha = float(row)
    OSS_alpha_step = (float(row) - 1.0 / float(row)) / (
        float(iteration) / float(OSS_interval) - 1.0)


cx = cp.arange(0, int(row), 1)
cy = cp.arange(0, int(col), 1)
cz = cp.arange(0, int(sta), 1)

#numpy配列 ⇒ cupy配列に変換

cp_diff_amp = cp.asarray(np_diff_amp, dtype="float32")
cp_sup = cp.asarray(np_sup, dtype="float32")
cp_initial_dens = cp.asarray(np_initial_dens, dtype="float32")
cp_dens = cp.asarray(np_dens)

print("iteration scale_factor Rfactor OS_ratio gamma")
with open(log_path, mode='a') as log:
    log.write("iteration scale_factor Rfactor OS_ratio gamma")

for i in range(int(iteration) + int(additional_iteration)):

    cp_structure_factor = cp.fft.fftn(cp_dens, axes=(0, 1, 2),
                                      norm="ortho")  #【フーリエ変換】
    cp_structure_factor = cp.fft.fftshift(
        cp_structure_factor)  #fftshiftを使ってシフト
    cp_amp = cp.absolute(cp_structure_factor)  #絶対値をとる
Ejemplo n.º 28
def svd_wrapper(matrix, mode, ncomp, debug, verbose, usv=False,
                random_state=None, to_numpy=True):
    """ Wrapper for different SVD libraries (CPU and GPU). 
    matrix : array_like, 2d
        2d input matrix.
    mode : {'lapack', 'arpack', 'eigen', 'randsvd', 'cupy', 'eigencupy',
            'randcupy', 'pytorch', 'eigenpytorch', 'randpytorch'}, str optional
        Switch for the SVD method/library to be used. ``lapack`` uses the LAPACK 
        linear algebra library through Numpy and it is the most conventional way 
        of computing the SVD (deterministic result computed on CPU). ``arpack`` 
        uses the ARPACK Fortran libraries accessible through Scipy (computation
        on CPU). ``eigen`` computes the singular vectors through the 
        eigendecomposition of the covariance M.M' (computation on CPU).
        ``randsvd`` uses the randomized_svd algorithm implemented in Sklearn 
        (computation on CPU). ``cupy`` uses the Cupy library for GPU computation
        of the SVD as in the LAPACK version. ``eigencupy`` offers the same 
        method as with the ``eigen`` option but on GPU (through Cupy). 
        ``randcupy`` is an adaptation f the randomized_svd algorithm, where all
        the computations are done on a GPU (through Cupy). ``pytorch`` uses the
        Pytorch library for GPU computation of the SVD. ``eigenpytorch`` offers
        the same method as with the ``eigen`` option but on GPU (through
        Pytorch). ``randpytorch`` is an adaptation of the randomized_svd
        algorithm, where all the linear algebra computations are done on a GPU
        (through Pytorch).
    ncomp : int
        Number of singular vectors to be obtained. In the cases when the full
        SVD is computed (LAPACK, ARPACK, EIGEN, CUPY), the matrix of singular 
        vectors is truncated. 
    debug : bool
        If True the explained variance ratio is computed and displayed.
    verbose: bool
        If True intermediate information is printed out.
    usv : bool optional
        If True the 3 terms of the SVD factorization are returned.
    random_state : int, RandomState instance or None, optional
        If int, random_state is the seed used by the random number generator.
        If RandomState instance, random_state is the random number generator.
        If None, the random number generator is the RandomState instance used
        by np.random. Used for ``randsvd`` mode.
    to_numpy : bool, optional
        If True (by default) the arrays computed in GPU are transferred from
        VRAM and converted to numpy ndarrays.

    V : array_like
        The right singular vectors of the input matrix. If ``usv`` is True it
        returns the left and right singular vectors and the singular values of
        the input matrix.
    * For ``lapack`` SVD mode see:
    * For ``eigen`` mode see:
    * For ``arpack`` SVD mode see:
    * For ``randsvd`` SVD mode see:
        Finding structure with randomness: Stochastic algorithms for constructing
        approximate matrix decompositions
        Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
    * For ``cupy`` SVD mode see:
    * For ``eigencupy`` mode see:
    * For ``pytorch`` SVD mode see:
    * For ``eigenpytorch`` mode see:


    def reconstruction(ncomp, U, S, V, var=1):
        if mode == 'lapack':
            rec_matrix = np.dot(U[:, :ncomp],
                                np.dot(np.diag(S[:ncomp]), V[:ncomp]))
            rec_matrix = rec_matrix.T
            print('  Matrix reconstruction with {} PCs:'.format(ncomp))
            print('  Mean Absolute Error =', MAE(matrix, rec_matrix))
            print('  Mean Squared Error =', MSE(matrix, rec_matrix))

            # see https://github.com/scikit-learn/scikit-learn/blob/c3980bcbabd9d2527548820581725df2904e4a0d/sklearn/decomposition/pca.py
            exp_var = (S ** 2) / (S.shape[0] - 1)
            full_var = np.sum(exp_var)
            explained_variance_ratio = exp_var / full_var   # % of variance explained by each PC
            ratio_cumsum = np.cumsum(explained_variance_ratio)
        elif mode == 'eigen':
            exp_var = (S ** 2) / (S.shape[0] - 1)
            full_var = np.sum(exp_var)
            explained_variance_ratio = exp_var / full_var   # % of variance explained by each PC
            ratio_cumsum = np.cumsum(explained_variance_ratio)
            rec_matrix = np.dot(U, np.dot(np.diag(S), V))
            print('  Matrix reconstruction MAE =', MAE(matrix, rec_matrix))
            exp_var = (S ** 2) / (S.shape[0] - 1)
            full_var = np.var(matrix, axis=0).sum()
            explained_variance_ratio = exp_var / full_var   # % of variance explained by each PC
            if var == 1:
                explained_variance_ratio = explained_variance_ratio[::-1]
            ratio_cumsum = np.cumsum(explained_variance_ratio)
            msg = '  This info makes sense when the matrix is mean centered '
            msg += '(temp-mean scaling)'

        lw = 2; alpha = 0.4
        fig = plt.figure(figsize=vip_figsize)
        ax1 = plt.subplot2grid((1, 3), (0, 0), colspan=2)
                 explained_variance_ratio, alpha=alpha, where='mid',
                 label='Individual EVR', lw=lw)
        ax1.plot(ratio_cumsum, '.-', alpha=alpha,
                 label='Cumulative EVR', lw=lw)
        ax1.legend(loc='best', frameon=False, fontsize='medium')
        ax1.set_ylabel('Explained variance ratio (EVR)')
        ax1.set_xlabel('Principal components')
        ax1.grid(linestyle='solid', alpha=0.2)
        ax1.set_xlim(-10, explained_variance_ratio.shape[0] + 10)
        ax1.set_ylim(0, 1)

        trunc = 20
        ax2 = plt.subplot2grid((1, 3), (0, 2), colspan=1)
        # plt.setp(ax2.get_yticklabels(), visible=False)
        ax2.step(range(trunc), explained_variance_ratio[:trunc], alpha=alpha,
                 where='mid', lw=lw)
        ax2.plot(ratio_cumsum[:trunc], '.-', alpha=alpha, lw=lw)
        ax2.set_xlabel('Principal components')
        ax2.grid(linestyle='solid', alpha=0.2)
        ax2.set_xlim(-2, trunc + 2)
        ax2.set_ylim(0, 1)

        msg = '  Cumulative explained variance ratio for {} PCs = {:.5f}'
        # plt.savefig('figure.pdf', dpi=300, bbox_inches='tight')
        print(msg.format(ncomp, ratio_cumsum[ncomp - 1]))

    # --------------------------------------------------------------------------

    if matrix.ndim != 2:
        raise TypeError('Input matrix is not a 2d array')

    if usv:
        if mode not in ('lapack', 'arpack', 'randsvd', 'cupy', 'randcupy',
                        'pytorch', 'randpytorch'):
            msg = "Returning USV is supported with modes lapack, arpack, "
            msg += "randsvd, cupy, randcupy, pytorch or randpytorch"
            raise ValueError(msg)

    if ncomp > min(matrix.shape[0], matrix.shape[1]):
        msg = '{} PCs cannot be obtained from a matrix with size [{},{}].'
        msg += ' Increase the size of the patches or request less PCs'
        raise RuntimeError(msg.format(ncomp, matrix.shape[0], matrix.shape[1]))

    if mode == 'eigen':
        # building C as np.dot(matrix.T,matrix) is slower and takes more memory
        C = np.dot(matrix, matrix.T)        # covariance matrix
        e, EV = linalg.eigh(C)              # EVals and EVs
        pc = np.dot(EV.T, matrix)           # PCs using a compact trick when cov is MM'
        V = pc[::-1]                        # reverse since we need the last EVs
        S = np.sqrt(np.abs(e))              # SVals = sqrt(EVals)
        S = S[::-1]                         # reverse since EVals go in increasing order
        if debug:
            reconstruction(ncomp, None, S, None)
        for i in range(V.shape[1]):
            V[:, i] /= S                    # scaling EVs by the square root of EVals
        V = V[:ncomp]
        if verbose:
            print('Done PCA with numpy linalg eigh functions')

    elif mode == 'lapack':
        # n_frames is usually smaller than n_pixels. In this setting taking the SVD of M'
        # and keeping the left (transposed) SVs is faster than taking the SVD of M (right SVs)
        U, S, V = linalg.svd(matrix.T, full_matrices=False)
        if debug:
            reconstruction(ncomp, U, S, V)
        V = V[:ncomp]                       # we cut projection matrix according to the # of PCs
        U = U[:, :ncomp]
        S = S[:ncomp]
        if verbose:
            print('Done SVD/PCA with numpy SVD (LAPACK)')

    elif mode == 'arpack':
        U, S, V = svds(matrix, k=ncomp)
        if debug:
            reconstruction(ncomp, U, S, V, -1)
        if verbose:
            print('Done SVD/PCA with scipy sparse SVD (ARPACK)')

    elif mode == 'randsvd':
        U, S, V = randomized_svd(matrix, n_components=ncomp, n_iter=2,
                                 transpose='auto', random_state=random_state)
        if debug:
            reconstruction(ncomp, U, S, V)
        if verbose:
            print('Done SVD/PCA with randomized SVD')

    elif mode == 'cupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        a_gpu = cupy.array(matrix)
        a_gpu = cupy.asarray(a_gpu)  # move the data to the current device
        u_gpu, s_gpu, vh_gpu = cupy.linalg.svd(a_gpu, full_matrices=True,
        V = vh_gpu[:ncomp]
        if to_numpy:
            V = cupy.asnumpy(V)
        if usv:
            S = s_gpu[:ncomp]
            if to_numpy:
                S = cupy.asnumpy(S)
            U = u_gpu[:, :ncomp]
            if to_numpy:
                U = cupy.asnumpy(U)
        if verbose:
            print('Done SVD/PCA with cupy (GPU)')

    elif mode == 'randcupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        U, S, V = randomized_svd_gpu(matrix, ncomp, n_iter=2, lib='cupy')
        if to_numpy:
            V = cupy.asnumpy(V)
            S = cupy.asnumpy(S)
            U = cupy.asnumpy(U)
        if debug:
            reconstruction(ncomp, U, S, V)
        if verbose:
            print('Done randomized SVD/PCA with cupy (GPU)')

    elif mode == 'eigencupy':
        if no_cupy:
            raise RuntimeError('Cupy is not installed')
        a_gpu = cupy.array(matrix)
        a_gpu = cupy.asarray(a_gpu)         # move the data to the current device
        C = cupy.dot(a_gpu, a_gpu.T)        # covariance matrix
        e, EV = cupy.linalg.eigh(C)         # eigenvalues and eigenvectors
        pc = cupy.dot(EV.T, a_gpu)          # PCs using a compact trick when cov is MM'
        V = pc[::-1]                        # reverse since last eigenvectors are the ones we want
        S = cupy.sqrt(e)[::-1]              # reverse since eigenvalues are in increasing order
        if debug:
            reconstruction(ncomp, None, S, None)
        for i in range(V.shape[1]):
            V[:, i] /= S                    # scaling by the square root of eigenvalues
        V = V[:ncomp]
        if to_numpy:
            V = cupy.asnumpy(V)
        if verbose:
            print('Done PCA with cupy eigh function (GPU)')

    elif mode == 'pytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        a_gpu = torch.Tensor.cuda(torch.from_numpy(matrix.astype('float32').T))
        u_gpu, s_gpu, vh_gpu = torch.svd(a_gpu)
        V = vh_gpu[:ncomp]
        S = s_gpu[:ncomp]
        U = torch.transpose(u_gpu, 0, 1)[:ncomp]
        if to_numpy:
            V = np.array(V)
            S = np.array(S)
            U = np.array(U)
        if verbose:
            print('Done SVD/PCA with pytorch (GPU)')

    elif mode == 'eigenpytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        a_gpu = torch.Tensor.cuda(torch.from_numpy(matrix.astype('float32')))
        C = torch.mm(a_gpu, torch.transpose(a_gpu, 0, 1))
        e, EV = torch.eig(C, eigenvectors=True)
        V = torch.mm(torch.transpose(EV, 0, 1), a_gpu)
        S = torch.sqrt(e[:, 0])
        if debug:
            reconstruction(ncomp, None, S, None)
        for i in range(V.shape[1]):
            V[:, i] /= S
        V = V[:ncomp]
        if to_numpy:
            V = np.array(V)
        if verbose:
            print('Done PCA with pytorch eig function')

    elif mode == 'randpytorch':
        if no_torch:
            raise RuntimeError('Pytorch is not installed')
        U, S, V = randomized_svd_gpu(matrix, ncomp, n_iter=2, lib='pytorch')
        if to_numpy:
            V = np.array(V)
            S = np.array(S)
            U = np.array(U)
        if debug:
            reconstruction(ncomp, U, S, V)
        if verbose:
            print('Done randomized SVD/PCA with randomized pytorch (GPU)')

        raise ValueError('The SVD mode is not available')

    if usv:
        if mode == 'lapack':
            return V.T, S, U.T
        elif mode == 'pytorch':
            if to_numpy:
                return V.T, S, U.T
                return torch.transpose(V, 0, 1), S, torch.transpose(U, 0, 1)
            return U, S, V
        if mode == 'lapack':
            return U.T
        elif mode == 'pytorch':
            return U
            return V
Ejemplo n.º 29
def unique(ar, return_index=False, return_inverse=False,
           return_counts=False, axis=None, *, equal_nan=True):
    """Find the unique elements of an array.

    Returns the sorted unique elements of an array. There are three optional
    outputs in addition to the unique elements:

    * the indices of the input array that give the unique values
    * the indices of the unique array that reconstruct the input array
    * the number of times each unique value comes up in the input array

        ar(array_like): Input array. This will be flattened if it is not
            already 1-D.
        return_index(bool, optional): If True, also return the indices of `ar`
            (along the specified axis, if provided, or in the flattened array)
            that result in the unique array.
        return_inverse(bool, optional): If True, also return the indices of the
            unique array (for the specified axis, if provided) that can be used
            to reconstruct `ar`.
        return_counts(bool, optional): If True, also return the number of times
            each unique item appears in `ar`.
        axis(int or None, optional): Not supported yet.
        equal_nan(bool, optional): If True, collapse multiple NaN values in the
            return array into one.

        cupy.ndarray or tuple:
            If there are no optional outputs, it returns the
            :class:`cupy.ndarray` of the sorted unique values. Otherwise, it
            returns the tuple which contains the sorted unique values and

            * The indices of the first occurrences of the unique values in the
              original array. Only provided if `return_index` is True.
            * The indices to reconstruct the original array from the
              unique array. Only provided if `return_inverse` is True.
            * The number of times each of the unique values comes up in the
              original array. Only provided if `return_counts` is True.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.unique`
    if axis is not None:
        raise NotImplementedError('axis option is not supported yet.')

    ar = cupy.asarray(ar).flatten()

    if return_index or return_inverse:
        perm = ar.argsort()
        aux = ar[perm]
        aux = ar
    mask = cupy.empty(aux.shape, dtype=cupy.bool_)
    mask[:1] = True
    mask[1:] = aux[1:] != aux[:-1]
    if equal_nan:
        _unique_update_mask_equal_nan(mask[1:], aux[:-1])

    ret = aux[mask]
    if not return_index and not return_inverse and not return_counts:
        return ret

    ret = ret,
    if return_index:
        ret += perm[mask],
    if return_inverse:
        imask = cupy.cumsum(mask) - 1
        inv_idx = cupy.empty(mask.shape, dtype=cupy.intp)
        inv_idx[perm] = imask
        ret += inv_idx,
    if return_counts:
        nonzero = cupy.nonzero(mask)[0]  # may synchronize
        idx = cupy.empty((nonzero.size + 1,), nonzero.dtype)
        idx[:-1] = nonzero
        idx[-1] = mask.size
        ret += idx[1:] - idx[:-1],
    return ret
Ejemplo n.º 30
        out_h, out_w = h - kh + 1 + ph * 2, w - kw + 1 + pw * 2  # TODO
    elif mode == 'valid':
        ph, pw = 0, 0
        out_h, out_w = h - kh + 1, w - kw + 1  # TODO
        raise NotImplementedError

    y = cp.empty((n, out_c, out_h, out_w), dtype=in1.dtype)

    col = im2col_gpu(in1, kh, kw, 1, 1, ph, pw)
    y = cp.tensordot(col, in2, ((1, 2, 3), (1, 2, 3))).astype(in1.dtype,
    y = cp.rollaxis(y, 3, 1)
    return y.transpose(2, 3, 0, 1)

if __name__ == '__main__':
    import cupy as cp
    import numpy as np
    from scipy.signal import convolve

    a = np.random.randn(5, 5, 5, 1) + 1j * np.random.randn(5, 5, 5, 1)
    b = np.random.randn(3, 3, 1, 1) + 1j * np.random.randn(3, 3, 1, 1)
    y_cpu = convolve(a, b, 'valid')

    x = cp.asarray(a)
    w = cp.asarray(b)
    y_gpu = convolve2d(x, w, 'valid')

    np.allclose(y_gpu.get().squeeze(), y_cpu.squeeze(), atol=1e-6)
Ejemplo n.º 31
    volume = np.repeat(volume, args.slice, axis=2)

    w, h, z = volume.shape

    # convert to a tensor
    b = c = 1
    x = volume.reshape(b, c, w, h, z).astype(np.float32)

    # reshape the tensor: [b(=1),c(=1),w,h,z] -> [b*z(=z),c(=1),w,h]
    x = x.transpose(0, 4, 1, 2, 3)
    x = x.reshape(b * z, c, w, h)

    # to gpu
    if args.gpu >= 0:
        import cupy as xp
        x = xp.asarray(x)
    x = chainer.Variable(x)

    # do
    radon = Radon(theta=np.linspace(0, 180, args.angle))

    if args.gpu >= 0:

    import tqdm
    for _ in tqdm.tqdm(range(args.trial)):
        ret = radon(x)
Ejemplo n.º 32
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
    Compute the multidimensional histogram of some data.

    sample : (N, D) array, or (D, N) array_like
        The data to be histogrammed.

        Note the unusual interpretation of sample when an array_like:

        * When an array, each row is a coordinate in a D-dimensional space -
          such as ``histogramdd(cupy.array([p1, p2, p3]))``.
        * When an array_like, each element is the list of values for single
          coordinate - such as ``histogramdd((X, Y, Z))``.

        The first form should be preferred.

    bins : sequence or int, optional
        The bin specification:

        * A sequence of arrays describing the monotonically increasing bin
          edges along each dimension.
        * The number of bins for each dimension (nx, ny, ... =bins)
        * The number of bins for all dimensions (nx=ny=...=bins).

    range : sequence, optional
        A sequence of length D, each an optional (lower, upper) tuple giving
        the outer bin edges to be used if the edges are not given explicitly in
        An entry of None in the sequence results in the minimum and maximum
        values being used for the corresponding dimension.
        The default, None, is equivalent to passing a tuple of D None values.
    density : bool, optional
        If False, the default, returns the number of samples in each bin.
        If True, returns the probability *density* function at the bin,
        ``bin_count / sample_count / bin_volume``.
    weights : (N,) array_like, optional
        An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
        The values of the returned histogram are equal to the sum of the
        weights belonging to the samples falling into each bin.

    H : ndarray
        The multidimensional histogram of sample x. See normed and weights
        for the different possible semantics.
    edges : list
        A list of D arrays describing the bin edges for each dimension.

    See Also
    histogram: 1-D histogram
    histogram2d: 2-D histogram

    >>> r = cupy.random.randn(100,3)
    >>> H, edges = cupy.histogramdd(r, bins = (5, 8, 4))
    >>> H.shape, edges[0].size, edges[1].size, edges[2].size
    ((5, 8, 4), 6, 9, 5)

    if isinstance(sample, cupy.ndarray):
        # Sample is an ND-array.
        if sample.ndim == 1:
            sample = sample[:, cupy.newaxis]
        nsamples, ndim = sample.shape
        sample = cupy.stack(sample, axis=-1)
        nsamples, ndim = sample.shape

    nbin = numpy.empty(ndim, int)
    edges = ndim * [None]
    dedges = ndim * [None]
    if weights is not None:
        weights = cupy.asarray(weights)

        nbins = len(bins)
        if nbins != ndim:
            raise ValueError(
                "The dimension of bins must be equal to the dimension of the "
                " sample x."
    except TypeError:
        # bins is an integer
        bins = ndim * [bins]

    # normalize the range argument
    if range is None:
        range = (None,) * ndim
    elif len(range) != ndim:
        raise ValueError("range argument must have one entry per dimension")

    # Create edge arrays
    for i in _range(ndim):
        if cnp.ndim(bins[i]) == 0:
            if bins[i] < 1:
                raise ValueError(
                    "`bins[{}]` must be positive, when an integer".format(i)
            smin, smax = _get_outer_edges(sample[:, i], range[i])
            num = int(bins[i] + 1)  # synchronize!
            edges[i] = cupy.linspace(smin, smax, num)
        elif cnp.ndim(bins[i]) == 1:
            edges[i] = cupy.asarray(bins[i])
            if (edges[i][:-1] > edges[i][1:]).any():
                raise ValueError(
                    "`bins[{}]` must be monotonically increasing, when an array".format(
            raise ValueError(
                "`bins[{}]` must be a scalar or 1d array".format(i)

        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
        dedges[i] = cupy.diff(edges[i])

    # Compute the bin number each sample falls into.
    ncount = tuple(
        # avoid cupy.digitize to work around gh-11022
        cupy.searchsorted(edges[i], sample[:, i], side="right")
        for i in _range(ndim)

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right edge to be
    # counted in the last bin, and not as an outlier.
    for i in _range(ndim):
        # Find which points are on the rightmost edge.
        on_edge = sample[:, i] == edges[i][-1]
        # Shift these points one bin to the left.
        ncount[i][on_edge] -= 1

    # Compute the sample indices in the flattened histogram matrix.
    # This raises an error if the array is too large.
    xy = cnp.ravel_multi_index(ncount, nbin)

    # Compute the number of repetitions in xy and assign it to the
    # flattened histmat.
    hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin))

    # Shape into a proper matrix
    hist = hist.reshape(nbin)

    # This preserves the (bad) behavior observed in gh-7845, for now.
    hist = hist.astype(float)  # Note: NumPy uses casting='safe' here too

    # Remove outliers (indices 0 and -1 for each dimension).
    core = ndim * (slice(1, -1),)
    hist = hist[core]

    if density:
        # calculate the probability density function
        s = hist.sum()
        for i in _range(ndim):
            shape = [1] * ndim
            shape[i] = nbin[i] - 2
            hist = hist / dedges[i].reshape(shape)
        hist /= s

    if any(hist.shape != numpy.asarray(nbin) - 2):
        raise RuntimeError("Internal Shape Error")
    return hist, edges
Ejemplo n.º 33
def _get_bin_edges(a, bins, range):
    Computes the bins used internally by `histogram`.

        a (ndarray): Ravelled data array
        bins (int or ndarray): Forwarded argument from `histogram`.
        range (None or tuple): Forwarded argument from `histogram`.

        bin_edges (ndarray): Array of bin edges
        uniform_bins (Number, Number, int): The upper bound, lowerbound, and
        number of bins, used in the implementation of `histogram` that works on
        uniform bins.
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    # if isinstance(bins, cupy.ndarray) and bins.ndim == 0:
    #     # allow uint8 array, etc
    #     if bins.dtype not in 'bui':
    #         raise TypeError(
    #             "`bins` must be an integer, a string, or an array")
    #     bins = int(bins)  # synchronize

    if isinstance(bins, int):  # will not allow 0-dimensional cupy array
        # if cupy.ndim(bins) == 0:
            n_equal_bins = operator.index(bins)
        except TypeError:
            raise TypeError("`bins` must be an integer, a string, or an array")
        if n_equal_bins < 1:
            raise ValueError("`bins` must be positive, when an integer")

        first_edge, last_edge = _get_outer_edges(a, range)

    elif isinstance(bins, cupy.ndarray):
        if bins.ndim == 1:  # cupy.ndim(bins) == 0:
            bin_edges = cupy.asarray(bins)
            if (bin_edges[:-1] > bin_edges[1:]).any():  # synchronize!
                raise ValueError(
                    "`bins` must increase monotonically, when an array"

    elif isinstance(bins, str):
        raise NotImplementedError("only integer and array bins are implemented")

    if n_equal_bins is not None:
        # numpy's gh-10322 means that type resolution rules are dependent on
        # array shapes. To avoid this causing problems, we pick a type now and
        # stick with it throughout.
        bin_type = cupy.result_type(first_edge, last_edge, a)
        if cupy.issubdtype(bin_type, cupy.integer):
            bin_type = cupy.result_type(bin_type, float)

        # bin edges must be computed
        bin_edges = cupy.linspace(
            n_equal_bins + 1,
        return bin_edges, (first_edge, last_edge, n_equal_bins)
        return bin_edges, None
Ejemplo n.º 34
def evolve(model):
    # Plot init
    train_loss = []
    train_acc  = []
    test_loss = []
    test_acc  = []

    n_train_batches = N_train / batchsize
    # early stopping
    patience = 5000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)
    best_validation_loss = np.inf
    test_score = 0
    done_looping = False

    # Learning loop
    epoch = 0
    while (epoch < n_epoch) and (not done_looping):
        epoch = epoch + 1
        print 'epoch {}'.format(epoch)

        # training
        perm = np.random.permutation(N_train)
        sum_train_accuracy = 0
        sum_train_loss = 0
        for i in xrange(0, N_train, batchsize):
            x = chainer.Variable(cp.asarray(x_train[perm[i:i + batchsize]]))
            t = chainer.Variable(cp.asarray(y_train[perm[i:i + batchsize]]))

            # Pass the loss function (Classifier defines it) and its arguments
            optimizer.update(model, x, t)
            sum_train_loss += float(model.loss.data) * len(t.data)
            sum_train_accuracy += float(model.accuracy.data) * len(t.data)
            # generate network graph
            # if epoch == 1 and i == 0:
                # with open('netgraph.dot', 'w') as o:
                    # g = computational_graph.build_computational_graph(
                        # (model.loss, ), remove_split=True)
                    # o.write(g.dump())
                # print 'net graph generated'
            # validation
            batch_index = (i / batchsize)
            iter = (epoch - 1) * n_train_batches + batch_index
            if (iter + 1) % validation_frequency == 0:
                sum_validate_accuracy = 0
                sum_validate_loss = 0
                for i in xrange(0, N_validate, batchsize):
                    x = chainer.Variable(cp.asarray(x_test[i:i + batchsize]),
                    t = chainer.Variable(cp.asarray(y_test[i:i + batchsize]),
                    loss = model(x, t)
                    sum_validate_loss += float(loss.data) * len(t.data)
                    sum_validate_accuracy += float(model.accuracy.data) * len(t.data)

                this_validate_loss = sum_validate_loss / N_validate
                this_validate_accuracy = sum_validate_accuracy / N_validate
                print 'validation epoch{}, minibatch{}/{}'.format(epoch, batch_index + 1, n_train_batches)
                print '      mean loss={}, accuracy={}'.format(
                    this_validate_loss, sum_validate_accuracy / N_validate)
                if this_validate_loss < best_validation_loss:
                    if this_validate_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                        print " iter {} / patience {}".format(iter+1, patience)
                    best_validation_loss = this_validate_loss

            if patience <= iter:
                done_looping = True

        train_loss.append(sum_train_loss / N_train)
        train_acc.append(sum_train_accuracy / N_train)
        print 'train mean loss={}, accuracy={}'.format(
            sum_train_loss / N_train, sum_train_accuracy / N_train)

        # evaluation
        sum_test_accuracy = 0
        sum_test_loss = 0
        for i in xrange(0, N_test, batchsize):
            x = chainer.Variable(cp.asarray(x_test[i:i + batchsize]),
            t = chainer.Variable(cp.asarray(y_test[i:i + batchsize]),
            loss = model(x, t)
            sum_test_loss += float(loss.data) * len(t.data)
            sum_test_accuracy += float(model.accuracy.data) * len(t.data)

        test_loss.append(sum_test_loss / N_test)
        test_acc.append(sum_test_accuracy / N_test)
        print 'test  mean loss={}, accuracy={}'.format(
            sum_test_loss / N_test, sum_test_accuracy / N_test)

    print 'train finish'
    print 'draw graph'
    # draw graph
    # このversionでの推移
    # plt.xlim([0, epoch])
    # plt.ylim([0.95, 1.0])
    plt.plot(xrange(1,len(train_acc)+1), train_acc)
    plt.plot(xrange(1,len(test_acc)+1), test_acc)
    plt.title("Accuracy of digit recognition.")
    plt.savefig("graph_v%5d.png" % (version))
    # このversionでの推移 範囲[0.95, 1.0]
    plt.xlim([0, epoch])
    plt.ylim([0.95, 1.0])
    plt.plot(xrange(1,len(train_acc)+1), train_acc)
    plt.plot(xrange(1,len(test_acc)+1), test_acc)
    plt.title("Accuracy of digit recognition. range [0.95, 1.0]")
    plt.savefig("graph_095_v%5d.png" % (version))
    # 各versionにおける精度の変化
    # plt.ylim([0.50, 1.0])
    plt.plot(xrange(1,len(version_acc)+1), version_acc)
    plt.title("Accuracy of digit recognition. (version)")
    plt.savefig("graph_version_v%5d.png" % (version))
    # 今まで全ての推移(x軸epoch)
    # plt.ylim([0.95, 1.0])
    plt.plot(xrange(1,len(all_train_acc)+1), all_train_acc)
    plt.plot(xrange(1,len(all_test_acc)+1), all_test_acc)
    plt.title("Accuracy of digit recognition.")
    plt.savefig("graph_allepoch_v%5d.png" % (version))
    plt.ylim([0.95, 1.0])
    plt.plot(xrange(1,len(all_train_acc)+1), all_train_acc)
    plt.plot(xrange(1,len(all_test_acc)+1), all_test_acc)
    plt.title("Accuracy of digit recognition. range [0.95, 1.0]")
    plt.savefig("graph_allepoch095_v%5d.png" % (version))
    # Save the model and the optimizer
    print 'save the model'
    serializers.save_hdf5("v%5d.model" % (version), model)
    print 'save the optimizer'
    serializers.save_hdf5('v%5d.state' % (version), optimizer)

    finishtime = time.time()
    print 'execute time = {}'.format(finishtime - starttime)

    # plt.show()
    return sum_test_accuracy / N_test
Ejemplo n.º 35
    def Allreduce_mean(self, x, **kwargs):
        """Multi-process multi-GPU based mean."""
        src = self.pool.reduce_mean(x, **kwargs)
        mean = self.mpi.Allreduce(cp.asnumpy(src)) / self.mpi.size

        return cp.asarray(mean)
Ejemplo n.º 36
 def walsh_transform(self,keys=None):
     if keys is None:
         keys = ['kernel'] + list(self.constraints.keys())
         keys = keys
     is_stored = dict()
     for key in keys:
         is_stored[key] = False
     if os.path.exists(self.fname):
         with h5py.File(self.fname,mode='r') as f:
             for key in keys:
                     if '3' in f[key].keys():
                         is_stored[key] = True
                     if key == 'depth':
                         res = f['depth']['constraint'][:] - self.constraints['depth']
                         res = np.linalg.norm(res)/np.linalg.norm(self.constraints['depth'])
                         if res > 1.0e-3:
                             is_stored[key] = False
                 except KeyError:
     logn = int(np.ceil(np.log2(self._nx*self._ny*self._nz)))
     norm_walsh = 1./(np.sqrt(2)**logn)
     blocks = ['0','1','2','3']
     matvec_op = {'kernel':self.kernel_op.gtoep.matvec,
               'dx': lambda x: self._dxyzvec(x,key='dx'),
               'dy': lambda x: self._dxyzvec(x,key='dy'),
               'dz': lambda x: self._dxyzvec(x,key='dz'),
               'refer': lambda x: self._diagvec(x,diag=self.constraints['refer']),
               'depth': lambda x: self._diagvec(x,diag=np.sqrt(self.constraints['depth']))
     is_stored['refer'] = True
     for key in keys:
         if is_stored[key]:
             print('walsh transformation of {} already exists.'.format(key))
         print('performing walsh transformation on {}.'.format(key))
         step = self.nx*self.ny*self.nz // 4
         if key == 'depth':
             step = self._nz
         with h5py.File(self.fname,mode='a') as f:
                 del f[key]
             except KeyError:
             dxyz_group = f.create_group(key)
             walsh_group = f['walsh_matrix']
             for i in range(4):
                 print("\t progress {}/4".format(i))
                 part_walsh = walsh_group[blocks[i]][:]
                 if key == 'depth':
                     part_walsh = walsh_group[blocks[i]][:self._nz]
                 part_walsh = matvec_op[key](part_walsh)
                 with cp.cuda.Device(2):
                     res = cp.zeros((step,step))
                     j = 0
                     while j*step < part_walsh.shape[1]:
                         tmp_block_gpu = cp.asarray(part_walsh[:,j*step:(j+1)*step])
                         res += tmp_block_gpu @ tmp_block_gpu.T
                         j += 1
                     res = cp.asnumpy(res)
                     if key in self._smooth_components:
                         res[np.abs(res)<1.0e-1*norm_walsh] = 0.
                     tmp_block_gpu = None
                     mempool = cp.get_default_memory_pool()
                     pinned_mempool = cp.get_default_pinned_memory_pool()
     if ('depth' in keys) and (not is_stored['depth']):
         with h5py.File(self.fname,mode='a') as f:
                 del f['depth_constraint']
             except KeyError:
             dxyz_group = f['depth']
Ejemplo n.º 37
import cupy as cp
import numpy as np
from cupy import testing

from skimage import data
from cupyimg.skimage import color
from cupyimg.skimage.util import img_as_bool
from cupyimg.skimage.morphology import binary, grey, selem
from cupyimg.scipy import ndimage as ndi

import pytest

img = color.rgb2gray(cp.asarray(data.astronaut()))
bw_img = img > 100 / 255.0

def test_non_square_image():
    strel = selem.square(3)
    binary_res = binary.binary_erosion(bw_img[:100, :200], strel)
    grey_res = img_as_bool(grey.erosion(bw_img[:100, :200], strel))
    testing.assert_array_equal(binary_res, grey_res)

def test_binary_erosion():
    strel = selem.square(3)
    binary_res = binary.binary_erosion(bw_img, strel)
    grey_res = img_as_bool(grey.erosion(bw_img, strel))
    testing.assert_array_equal(binary_res, grey_res)

def test_binary_dilation():
Ejemplo n.º 38
 def to_gpu(self, arr):
     return cupy.asarray(arr)
Ejemplo n.º 39
def to_gpu(*args):
    """ Upload numpy arrays to GPU and return them"""
    if len(args) > 1:
        return (cp.asarray(x) for x in args)
        return cp.asarray(args[0])
Ejemplo n.º 40
def corner_peaks(
    """Find peaks in corner measure response image.

    This differs from `skimage.feature.peak_local_max` in that it suppresses
    multiple connected peaks with the same accumulator value.

    image : ndarray
        Input image.
    min_distance : int, optional
        The minimal allowed distance separating peaks.
    * : *
        See :py:meth:`skimage.feature.peak_local_max`.
    p_norm : float
        Which Minkowski p-norm to use. Should be in the range [1, inf].
        A finite large p may cause a ValueError if overflow can occur.
        ``inf`` corresponds to the Chebyshev distance and 2 to the
        Euclidean distance.

    output : ndarray or ndarray of bools

        * If `indices = True`  : (row, column, ...) coordinates of peaks.
        * If `indices = False` : Boolean array shaped like `image`, with peaks
          represented by True values.

    See also

    .. versionchanged:: 0.18
        The default value of `threshold_rel` has changed to None, which
        corresponds to letting `skimage.feature.peak_local_max` decide on the
        default. This is equivalent to `threshold_rel=0`.

    The `num_peaks` limit is applied before suppression of connected peaks.
    To limit the number of peaks after suppression, set `num_peaks=np.inf` and
    post-process the output of this function.

    >>> from cupyimg.skimage.feature import peak_local_max
    >>> response = cp.zeros((5, 5))
    >>> response[2:4, 2:4] = 1
    >>> response
    array([[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 1., 1., 0.],
           [0., 0., 1., 1., 0.],
           [0., 0., 0., 0., 0.]])
    >>> peak_local_max(response)
    array([[2, 2],
           [2, 3],
           [3, 2],
           [3, 3]])
    >>> corner_peaks(response)
    array([[2, 2]])

    if cp.isinf(num_peaks):
        num_peaks = None

    # Get the coordinates of the detected peaks
    coords = peak_local_max(

    if len(coords):
        # TODO: modify to do KDTree on the GPU (cuSpatial?)
        coords = cp.asnumpy(coords)

        # Use KDtree to find the peaks that are too close to each other
        tree = spatial.cKDTree(coords)

        rejected_peaks_indices = set()
        for idx, point in enumerate(coords):
            if idx not in rejected_peaks_indices:
                candidates = tree.query_ball_point(point,

        # Remove the peaks that are too close to each other
        coords = np.delete(coords, tuple(rejected_peaks_indices),
        coords = cp.asarray(coords)

    if indices:
        return coords

    peaks = cp.zeros_like(image, dtype=bool)
    peaks[tuple(coords.T)] = True

    return peaks
Ejemplo n.º 41
    def deformation(self, prm):
            Apply 2D Gaussian and Planar deformation.
            Computation is parallelized on GPU using cupy.
        import cupy as cp
        xy_cp = cp.asarray(prm.xy)
        a_cp = cp.asarray(self.a)
        b_cp = cp.asarray(self.b)
        c_cp = cp.asarray(self.c)
        d_cp = cp.asarray(self.d)
        sigma_cp = cp.asarray(self.sigma)
        e_cp = cp.asarray(self.e)
        f_cp = cp.asarray(self.f)
        g_cp = cp.asarray(self.g)
        z_cp = cp.asarray(prm.z)

        func_planar = cp.ElementwiseKernel(
            in_params='T x, T y, T e, T f, T g',
            out_params='T z',
            operation= \
                z = e + f*x + g*y;

        func_gauss2d = cp.ElementwiseKernel(
            in_params='T x, T y, T b, T c, T d, T sigma',
            out_params='T z',
            operation= \
                z = b*expf(-(powf(x-c,2) + powf(y-d,2))/(2*powf(sigma,2)));

        gauss_2d_cp = cp.zeros_like(xy_cp[:, 0])
        for i in range(len(self.b)):
            gauss_2d_cp += func_gauss2d(xy_cp[:, 0], xy_cp[:, 1], b_cp[i], c_cp[i], d_cp[i], sigma_cp[i])
        s1_cp = a_cp + (1.5 / z_cp) * cp.outer(cp.transpose(gauss_2d_cp), z_cp)
        s2_cp = func_planar(xy_cp[:, 0], xy_cp[:, 1], e_cp, f_cp, g_cp)

        refl_cp = cp.asarray(self.refl)
        for i in range(prm.nxy_tr):
            s = s1_cp[i, :] + s2_cp[i] + z_cp
            mat = cp.tile(z_cp, (len(s), 1)) - cp.tile(cp.expand_dims(s, 1), (1, len(z_cp)))
            refl_cp[i, :] = cp.dot(refl_cp[i, :], cp.sinc(mat))

        return np.reshape(cp.asnumpy(refl_cp), [prm.nxy_tr, prm.nz_tr])
Ejemplo n.º 42
def get_good_channels(raw_data=None, probe=None, params=None):
    of the channels indicated by the user as good (chanMap)
    further subset those that have a mean firing rate above a certain value
    (default is ops.minfr_goodchannels = 0.1Hz)
    needs the same filtering parameters in ops as usual
    also needs to know where to start processing batches (twind)
    and how many channels there are in total (NchanTOT)
    fs = params.fs
    fshigh = params.fshigh
    fslow = params.fslow
    Nbatch = get_Nbatch(raw_data, params)
    NT = params.NT
    spkTh = params.spkTh
    nt0 = params.nt0
    minfr_goodchannels = params.minfr_goodchannels

    chanMap = probe.chanMap
    # Nchan = probe.Nchan
    NchanTOT = len(chanMap)

    ich = []
    k = 0
    ttime = 0

    # skip every 100 batches
    for ibatch in tqdm(range(0, Nbatch, int(ceil(Nbatch / 100))),
                       desc="Finding good channels"):
        i = NT * ibatch
        buff = raw_data[:, i:i + NT]
        assert np.isfortran(buff)
        if buff.size == 0:

        # Put on GPU.
        buff = cp.asarray(buff, dtype=np.float32)
        assert cp.isfortran(buff)
        datr = gpufilter(buff,

        # very basic threshold crossings calculation
        s = cp.std(datr, axis=0)
        datr = datr / s  # standardize each channel ( but don't whiten)
        mdat = my_min(
            datr, 30,
            0)  # get local minima as min value in +/- 30-sample range

        # take local minima that cross the negative threshold
        xi, xj = cp.nonzero((datr < mdat + 1e-3) & (datr < spkTh))

        # filtering may create transients at beginning or end. Remove those.
        xj = xj[(xi >= nt0) & (xi <= NT - nt0)]

        # collect the channel identities for the detected spikes
        k += xj.size

        # keep track of total time where we took spikes from
        ttime += datr.shape[0] / fs

    ich = cp.concatenate(ich)

    # count how many spikes each channel got
    nc, _ = cp.histogram(ich, cp.arange(NchanTOT + 1))

    # divide by total time to get firing rate
    nc = nc / ttime

    # keep only those channels above the preset mean firing rate
    igood = cp.asnumpy(nc >= minfr_goodchannels)

    logger.info('Found %d threshold crossings in %2.2f seconds of data.' %
                (k, ttime))
    logger.info('Found %d/%d bad channels.' % (np.sum(~igood), len(igood)))

    return igood
Ejemplo n.º 43
 def test_with_strides(self, dtype):
     a = testing.shaped_arange((2, 3, 4), cupy, dtype).T
     b = cupy.asarray(
         DummyObjectWithCudaArrayInterface(a, self.ver, self.strides))
     assert a.strides == b.strides
     assert a.nbytes == b.data.mem.size
Ejemplo n.º 44
def get_whitening_matrix(raw_data=None, probe=None, params=None):
    based on a subset of the data, compute a channel whitening matrix
    this requires temporal filtering first (gpufilter)
    Nbatch = get_Nbatch(raw_data, params)
    ntbuff = params.ntbuff
    NTbuff = params.NTbuff
    whiteningRange = params.whiteningRange
    scaleproc = params.scaleproc
    NT = params.NT
    fs = params.fs
    fshigh = params.fshigh
    nSkipCov = params.nSkipCov

    xc = probe.xc
    yc = probe.yc
    chanMap = probe.chanMap
    Nchan = probe.Nchan
    chanMap = probe.chanMap

    # Nchan is obtained after the bad channels have been removed
    CC = cp.zeros((Nchan, Nchan))

    for ibatch in tqdm(range(0, Nbatch, nSkipCov),
                       desc="Computing the whitening matrix"):
        # WARNING: we use Fortran order, so raw_data is NchanTOT x nsamples
        i = max(0, (NT - ntbuff) * ibatch - 2 * ntbuff)
        buff = raw_data[:, i:i + NT - ntbuff]

        nsampcurr = buff.shape[1]
        if nsampcurr < NTbuff:
            buff = np.concatenate(
                 np.tile(buff[:, nsampcurr - 1][:, np.newaxis], (1, NTbuff))),

        buff_g = cp.asarray(buff, dtype=np.float32)

        # apply filters and median subtraction
        datr = gpufilter(buff_g, fs=fs, fshigh=fshigh, chanMap=chanMap)

        CC = CC + cp.dot(datr.T, datr) / NT  # sample covariance

    CC = CC / ceil((Nbatch - 1) / nSkipCov)

    if whiteningRange < np.inf:
        #  if there are too many channels, a finite whiteningRange is more robust to noise
        # in the estimation of the covariance
        whiteningRange = min(whiteningRange, Nchan)
        # this function performs the same matrix inversions as below, just on subsets of
        # channels around each channel
        Wrot = whiteningLocal(CC, yc, xc, whiteningRange)
        Wrot = whiteningFromCovariance(CC)

    Wrot = Wrot * scaleproc

    logger.info("Computed the whitening matrix.")

    return Wrot
Ejemplo n.º 45
 def test_not_copied(self, dtype):
     a = testing.shaped_arange((2, 3, 4), cupy, dtype)
     b = cupy.asarray(
         DummyObjectWithCudaArrayInterface(a, self.ver, self.strides))
     testing.assert_array_equal(a, b)
Ejemplo n.º 46
    def evaluate(self):
        m1 = self.fiducial_binaries["mass_1"]
        m2 = self.fiducial_binaries["mass_2"]
        # Note that spins are redshift independent
        spin_1x, spin_1y, spin_1z = [
            for k in ["spin_1x", "spin_1y", "spin_1z"]
        spin_2x, spin_2y, spin_2z = [
            for k in ["spin_2x", "spin_2y", "spin_2z"]

        if _GPU_ENABLED:
            import cupy as xp
            import numpy as xp
        m1 = xp.asarray(m1)
        m2 = xp.asarray(m2)
        spin_1x = xp.asarray(spin_1x)
        spin_1y = xp.asarray(spin_1y)
        spin_1z = xp.asarray(spin_1z)
        spin_2x = xp.asarray(spin_2x)
        spin_2y = xp.asarray(spin_2y)
        spin_2z = xp.asarray(spin_2z)
        pdf_spin_fiducial = xp.asarray(self.pdf_spin_fiducial)
        pdf_mass_fiducial = xp.asarray(self.pdf_mass_fiducial)

        pdf_spin_pop = self.spin_src_pop_model.prob({
            "spin_1x": spin_1x,
            "spin_1y": spin_1y,
            "spin_1z": spin_1z,
            "spin_2x": spin_2x,
            "spin_2y": spin_2y,
            "spin_2z": spin_2z,
        weights_spin = pdf_spin_pop / pdf_spin_fiducial

        for img in range(self.N_img):
            self.predictions[img] = xp.asarray(self.predictions[img])
            self.pdf_dLs_fiducial[img] = xp.asarray(self.pdf_dLs_fiducial[img])
            self.apparent_dLs[img] = xp.asarray(self.apparent_dLs[img])

        def epsilon(z_src):
            det_mass_pop_dist = DetectorFrameComponentMassesFromSourceFrame(
                self.mass_src_pop_model, z_src)
            pdf_mass_pop = det_mass_pop_dist.prob({"mass_1": m1, "mass_2": m2})
            weights_mass = pdf_mass_pop / pdf_mass_fiducial
            weights_source = weights_mass * weights_spin
            integrand = weights_source

            for img in range(self.N_img):
                pdf_dL_fiducial = self.pdf_dLs_fiducial[img]
                dL_pop_dist = LuminosityDistancePriorFromAbsoluteMagnificationRedshift(
                    self.abs_magn_dist[img], z_src)
                pdf_dL_pop = dL_pop_dist.prob(self.apparent_dLs[img])
                weights_dL = pdf_dL_pop / pdf_dL_fiducial
                integrand *= self.predictions[img] * weights_dL

            return float(xp.sum(integrand) / float(self.N_inj))

        logger = logging.getLogger(__prog__)
        logger.info("Integrating over source redshift")
        z_dist = LensedSourceRedshiftProbDist(
        zs = z_dist.sample(size=self.N_z)
        if _GPU_ENABLED:
            import cupy as cp
            zs = cp.asnumpy(zs)
        epsilons = []
        for z in tqdm.tqdm(zs):

        beta = np.sum(epsilons).astype(float) / self.N_z

        return beta
Ejemplo n.º 47
 def calc_log_prior_total_det(self):
     self.log_prior_det_val = 0
     self.log_total_det_val = 0
     blocks = ['0','1','2','3']
     prior_eigs = np.zeros(self._nx*self._ny*self._nz)
     total_eigs = np.zeros(self._nx*self._ny*self._nz)
     step = self._nx*self._ny*self._nz//4
         depth_weight = self._weights['depth']
     except KeyError:
         depth_weight = 1.
     with h5py.File(self.fname,mode='r') as f:
         if 'depth' in self._weights.keys():
             depth_walsh = f['depth']['0'][:]
         for i_b,block in enumerate(blocks):
             tmp_block = np.zeros((step,step))
             for dxyz_name in self._smooth_components:
                     dxyz_walsh = f[dxyz_name][block][:].reshape(step//self._nz,
                     ein_path = np.einsum_path('mi,xiyj,jn->xmyn',
                     tmp_multi = np.einsum('mi,xiyj,jn->xmyn',
                     tmp_block += depth_weight*self._weights[dxyz_name]*tmp_multi.reshape(step,step)
                 except KeyError:
             if 'refer' in self._weights.keys():
                 tmp_multi_small = depth_walsh.T@depth_walsh
                 for i in range(step//self._nz):
                               i*self._nz:(i+1)*self._nz] += depth_weight*self._weights['refer']*tmp_multi_small
             with cp.cuda.Device(2):
                 tmp_block_gpu = cp.asarray(tmp_block,dtype=np.float32)
                 eigs = cp.linalg.eigvalsh(tmp_block_gpu)
                 prior_eigs[i_b*step:(i_b+1)*step] = cp.asnumpy(eigs)
                 self.log_prior_det_val += cp.asnumpy(cp.sum(cp.log(eigs)))
                 tmp_block_gpu = None
                 eigs = None
             tmp_block += self._weights['obs']*f['kernel'][block][:]
             with cp.cuda.Device(2):
                 tmp_block_gpu = cp.asarray(tmp_block,dtype=np.float32)
                 eigs = cp.linalg.eigvalsh(tmp_block_gpu)
                 total_eigs[i_b*step:(i_b+1)*step] = cp.asnumpy(eigs)
                 self.log_total_det_val += cp.asnumpy(cp.sum(cp.log(eigs)))
                 tmp_block_gpu = None
                 eigs = None
     self.log_prior_det_val = cp.asnumpy(self.log_prior_det_val)
     self.log_total_det_val = cp.asnumpy(self.log_total_det_val)
     self.eigs = {'prior':prior_eigs,'total':total_eigs}
     return self.log_prior_det_val,self.log_total_det_val
Ejemplo n.º 48
def to_sp_dask_array(cudf_or_array, client=None):
    Converts an array or cuDF to a sparse Dask array backed by sparse CuPy.
    CSR matrices. Unfortunately, due to current limitations in Dask, there is
    no direct path to convert a cupy.sparse.spmatrix into a CuPy backed
    dask.Array without copying to host.

    NOTE: Until https://github.com/cupy/cupy/issues/2655 and
    https://github.com/dask/dask/issues/5604 are implemented, compute()
    will not be able to be called on a Dask.array that is backed with
    sparse CuPy arrays because they lack the necessary functionality
    to be stacked into a single array. The array returned from this
    utility will, however, still be able to be passed into functions
    that can make use of sparse CuPy-backed Dask.Array (eg. Distributed
    Naive Bayes).

    Relevant cuML issue: https://github.com/rapidsai/cuml/issues/1387

    cudf_or_array : cuDF Dataframe, array-like sparse / dense array, or
                    Dask DataFrame/Array
    client : dask.distributed.Client (optional) Dask client

    dtype : output dtype

    dask_array : dask.Array backed by cupy.sparse.csr_matrix
    client = default_client() if client is None else client

    # Makes sure the MatDescriptor workaround for CuPy sparse arrays
    # is loaded (since Dask lazy-loaded serialization in cuML is only
    # executed when object from the cuML package needs serialization.
    # This can go away once the MatDescriptor pickling bug is fixed
    # in CuPy.
    # Ref: https://github.com/cupy/cupy/issues/3061
    from cuml.comm import serialize  # NOQA

    shape = cudf_or_array.shape
    if isinstance(cudf_or_array, dask.dataframe.DataFrame) or \
       isinstance(cudf_or_array, cudf.DataFrame):
        dtypes = np.unique(cudf_or_array.dtypes)

        if len(dtypes) > 1:
            raise ValueError("DataFrame should contain only a single dtype")

        dtype = dtypes[0]
        dtype = cudf_or_array.dtype

    meta = cupyx.scipy.sparse.csr_matrix(rmm_cupy_ary(cp.zeros, 1))

    if isinstance(cudf_or_array, dask.array.Array):
        # At the time of developing this, using map_blocks will not work
        # to convert a Dask.Array to CuPy sparse arrays underneath.

        parts = client.sync(_extract_partitions, cudf_or_array)
        cudf_or_array = [
            client.submit(_conv_np_to_df, part, workers=[w])
            for w, part in parts

        cudf_or_array = to_dask_cudf(cudf_or_array)

    if isinstance(cudf_or_array, dask.dataframe.DataFrame):
        Dask.Dataframe needs special attention since it has multiple dtypes.
        Just use the first (and assume all the rest are the same)
        cudf_or_array = cudf_or_array.map_partitions(
            _conv_df_to_sp, meta=dask.array.from_array(meta))

        # This will also handle the input of dask.array.Array
        return cudf_or_array

        if scipy.sparse.isspmatrix(cudf_or_array):
            cudf_or_array = \
        elif cupyx.scipy.sparse.isspmatrix(cudf_or_array):
        elif isinstance(cudf_or_array, cudf.DataFrame):
            cupy_ary = cp.asarray(cudf_or_array.as_gpu_matrix(), dtype)
            cudf_or_array = cupyx.scipy.sparse.csr_matrix(cupy_ary)
        elif isinstance(cudf_or_array, np.ndarray):
            cupy_ary = rmm_cupy_ary(cp.asarray,
            cudf_or_array = cupyx.scipy.sparse.csr_matrix(cupy_ary)

        elif isinstance(cudf_or_array, cp.core.core.ndarray):
            cudf_or_array = cupyx.scipy.sparse.csr_matrix(cudf_or_array)
            raise ValueError("Unexpected input type %s" % type(cudf_or_array))

        # Push to worker
        cudf_or_array = client.scatter(cudf_or_array)

    return dask.array.from_delayed(cudf_or_array, shape=shape, meta=meta)
Ejemplo n.º 49
test_x /= 255

all_results = None
for j in range(NUM_MODELS):
  model = MLP()

  optimizer = optimizers.Adam()

  for epoch in range(1, NUM_EPOCH + 1):
    perm = np.random.permutation(NUM_TRAIN)
    train_accuracy, train_loss = 0, 0
    for i in range(0, NUM_TRAIN, BATCH_SIZE):
      x = chainer.Variable(cp.asarray(train_x[perm[i:i + BATCH_SIZE]]), volatile='off')
      t = chainer.Variable(cp.asarray(train_y[perm[i:i + BATCH_SIZE]]), volatile='off')

      optimizer.update(model, x, t)

      train_loss += float(model.loss.data) * len(t.data)
      train_accuracy += float(model.accuracy.data) * len(t.data)

    epoch_result = None
    test_accuracy, test_loss = 0, 0
    for i in range(0, NUM_TEST, BATCH_SIZE):
      x = chainer.Variable(cp.asarray(test_x[i:i + BATCH_SIZE]), volatile='on')
      t = chainer.Variable(cp.asarray(test_y[i:i + BATCH_SIZE]), volatile='on')
      batch_result = model(x, t, False)
      if epoch == NUM_EPOCH:
        if i == 0:
Ejemplo n.º 50
def _array_to_gpu(array, device, stream):
    if array is None:
        return None

    if isinstance(array, chainerx.ndarray):
        # TODO(niboshi): Update this logic once both CuPy and ChainerX support
        # the array interface.
        if array.device.backend.name == 'cuda':
            # Convert to cupy.ndarray on the same device as source array
            array = cupy.ndarray(
                        array.data_ptr + array.offset,
            array = chainerx.to_numpy(array)
    elif isinstance(array, (numpy.number, numpy.bool_)):
        array = numpy.asarray(array)
    elif isinstance(array, intel64.mdarray):
        array = numpy.asarray(array)

    if isinstance(array, ndarray):
        if array.device == device:
            return array
        is_numpy = False
    elif isinstance(array, numpy.ndarray):
        is_numpy = True
        raise TypeError(
            'The array sent to gpu must be an array or a NumPy scalar.'
            '\nActual type: {0}.'.format(type(array)))

    if stream is not None and stream.ptr != 0:
        ret = cupy.empty_like(array)
        if is_numpy:
            # cpu to gpu
            mem = cupy.cuda.alloc_pinned_memory(array.nbytes)
            src = numpy.frombuffer(
                mem, array.dtype, array.size).reshape(array.shape)
            src[...] = array
            ret.set(src, stream)
                stream.record(), mem)
            # gpu to gpu
            with array.device:
                src = array.copy()
                event = Stream.null.record()
                src.data, src.nbytes, stream)

            # to hold a reference until the end of the asynchronous
            # memcpy
            stream.add_callback(lambda *x: None, (src, ret))
        return ret

    with device:
        if is_numpy:
            return cupy.asarray(array)
        # Need to make a copy when an array is copied to another device
        return cupy.array(array, copy=True)
Ejemplo n.º 51
 def _calc_array(self, cpu_c_flat: np.ndarray) -> np.ndarray:
     gpu_c_flat = cp.asarray(cpu_c_flat)
     gpu_iteration_flat = self.server.compute_flat_array(gpu_c_flat)
     cpu_iteration_flat = cp.asnumpy(gpu_iteration_flat)
     # cpu_iteration_flat = compute_array.ComputeGpu.compute(cpu_c_flat)
     return cpu_iteration_flat
def FC_to_FCWB(source, target, tftype='affine'):
    source = [[(xyz[0] * + 475)/1.7, (xyz[1] -185) / -2, (xyz[2]-125) / -2.5] for xyz in source]
    source = cp.asarray(source, dtype=cp.float32)
    target = cp.asarray(target, dtype=cp.float32)
    tf_param, _, _ = cpd.registration_cpd(source, target, tf_type_name=tftype, use_cuda=use_cuda)
    return tf_param
def FC_to_FCWB_transform(object, tf_param):
    object = [[(xyz[0] * + 475) / 1.7, (xyz[1] - 185) / -2, (xyz[2] - 125) / -2.5] for xyz in list(object)]
    return to_cpu(tf_param.transform(cp.asarray(object, dtype=cp.float32)))
    def __call__(self, loc, score,
                 anchor, img_size, scale=1.):
        """input should  be ndarray
        Propose RoIs.

        Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
        by the same index.

        On notations, :math:`R` is the total number of anchors. This is equal
        to product of the height and the width of an image and the number of
        anchor bases per pixel.

        Type of the output is same as the inputs.

            loc (array): Predicted offsets and scaling to anchors.
                Its shape is :math:`(R, 4)`.
            score (array): Predicted foreground probability for anchors.
                Its shape is :math:`(R,)`.
            anchor (array): Coordinates of anchors. Its shape is
                :math:`(R, 4)`.
            img_size (tuple of ints): A tuple :obj:`height, width`,
                which contains image size after scaling.
            scale (float): The scaling factor used to scale an image after
                reading it from a file.

            An array of coordinates of proposal boxes.
            Its shape is :math:`(S, 4)`. :math:`S` is less than
            :obj:`self.n_test_post_nms` in test time and less than
            :obj:`self.n_train_post_nms` in train time. :math:`S` depends on
            the size of the predicted bounding boxes and the number of
            bounding boxes discarded by NMS.

        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(
            roi[:, slice(0, 4, 2)], 0, img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(
            roi[:, slice(1, 4, 2)], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi
def FCWB_to_FC_transform(object, tf_param):
    return to_cpu(tf_param.transform(cp.asarray(object, dtype=cp.float32)))
Ejemplo n.º 56
def randomized_svd_gpu(M, n_components, n_oversamples=10, n_iter='auto',
                       transpose='auto', random_state=0, lib='cupy'):
    """Computes a truncated randomized SVD on GPU. Adapted from Sklearn.

    M : ndarray or sparse matrix
        Matrix to decompose
    n_components : int
        Number of singular values and vectors to extract.
    n_oversamples : int (default is 10)
        Additional number of random vectors to sample the range of M so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of M is n_components + n_oversamples. Smaller
        number can improve speed but can negatively impact the quality of
        approximation of singular vectors and singular values.
    n_iter : int or 'auto' (default is 'auto')
        Number of power iterations. It can be used to deal with very noisy
        problems. When 'auto', it is set to 4, unless `n_components` is small
        (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
        This improves precision with few components.
    transpose : True, False or 'auto' (default)
        Whether the algorithm should be applied to M.T instead of M. The
        result should approximately be the same. The 'auto' mode will
        trigger the transposition if M.shape[1] > M.shape[0] since this
        implementation of randomized SVD tend to be a little faster in that
    random_state : int, RandomState instance or None, optional (default=None)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`.
    lib : {'cupy', 'pytorch'}, str optional
        Chooses the GPU library to be used.

    This algorithm finds a (usually very good) approximate truncated
    singular value decomposition using randomization to speed up the
    computations. It is particularly fast on large matrices on which
    you wish to extract only a small number of components. In order to
    obtain further speed up, `n_iter` can be set <=2 (at the cost of
    loss of precision).

    * Finding structure with randomness: Stochastic algorithms for constructing
      approximate matrix decompositions
      Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
    * A randomized algorithm for the decomposition of matrices
      Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
    * An implementation of a randomized algorithm for principal component
      A. Szlam et al. 2014
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_samples, n_features = M.shape

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        n_iter = 7 if n_components < .1 * min(M.shape) else 4

    if transpose == 'auto':
        transpose = n_samples < n_features
    if transpose:
        M = M.T # this implementation is a bit faster with smaller shape[1]

    if lib == 'cupy':
        M = cupy.array(M)
        M = cupy.asarray(M)

        # Generating normal random vectors with shape: (M.shape[1], n_random)
        Q = random_state.normal(size=(M.shape[1], n_random))
        Q = cupy.array(Q)
        Q = cupy.asarray(Q)

        # Perform power iterations with Q to further 'imprint' the top
        # singular vectors of M in Q
        for i in range(n_iter):
            Q = cupy.dot(M, Q)
            Q = cupy.dot(M.T, Q)

        # Sample the range of M using by linear projection of Q. Extract an orthonormal basis
        Q, _ = cupy.linalg.qr(cupy.dot(M, Q), mode='reduced')

        # project M to the (k + p) dimensional space using the basis vectors
        B = cupy.dot(Q.T, M)

        B = cupy.array(B)
        Q = cupy.array(Q)
        # compute the SVD on the thin matrix: (k + p) wide
        Uhat, s, V = cupy.linalg.svd(B, full_matrices=False, compute_uv=True)
        del B
        U = cupy.dot(Q, Uhat)

        if transpose:
            # transpose back the results according to the input convention
            return V[:n_components, :].T, s[:n_components], U[:,
            return U[:, :n_components], s[:n_components], V[:n_components, :]

    elif lib == 'pytorch':
        M_gpu = torch.Tensor.cuda(torch.from_numpy(M.astype('float32')))

        # Generating normal random vectors with shape: (M.shape[1], n_random)
        Q = torch.cuda.FloatTensor(M_gpu.shape[1], n_random).normal_()

        # Perform power iterations with Q to further 'imprint' the top
        # singular vectors of M in Q
        for i in range(n_iter):
            Q = torch.mm(M_gpu, Q)
            Q = torch.mm(torch.transpose(M_gpu, 0, 1), Q)

        # Sample the range of M using by linear projection of Q. Extract an orthonormal basis
        Q, _ = torch.qr(torch.mm(M_gpu, Q))

        # project M to the (k + p) dimensional space using the basis vectors
        B = torch.mm(torch.transpose(Q, 0, 1), M_gpu)

        # compute the SVD on the thin matrix: (k + p) wide
        Uhat, s, V = torch.svd(B)
        del B
        U = torch.mm(Q, Uhat)

        if transpose:
            # transpose back the results according to the input convention
            return (torch.transpose(V[:n_components, :], 0, 1),
                    torch.transpose(U[:, :n_components], 0, 1))
            return U[:, :n_components], s[:n_components], V[:n_components, :]
Ejemplo n.º 57
 def to_cupy(self, copy=False):
     if copy:
         return cp.array(self)
     return cp.asarray(self)