コード例 #1
0
ファイル: solver.py プロジェクト: nikitinvv/gwp2d
 def coordinates_adj(self, ang):
     """
     Compute coordinates of the global grid 
     for the gathering operation with respect to the box on the last layer.
     Parameters
     ----------
     ang : float32
         Box orientation angle
     Returns
     -------        
     xr: [Ns,2]: float32
         (y,x) array of coordinates
     """
     # form 1D array of indeces
     xr = cp.indices((self.subregion[ang, 1]-self.subregion[ang, 0],
                      self.subregion[ang, 3]-self.subregion[ang, 2]))
     xr[0] += self.subregion[ang, 0]-self.fgridshape[0]//2
     xr[1] += self.subregion[ang, 2]-self.fgridshape[1]//2
     xr = xr.reshape(2, -1).swapaxes(0, 1)
     # rotate and shift
     xr = util.rotate(xr, self.theta[ang], reverse=True)
     xr[:, 1] -= self.xi_cent[-1]
     # switch to [-1/2,1/2) interval w.r.t. box
     xr /= cp.array(self.boxshape[-1])
     return xr
コード例 #2
0
ファイル: make_data.py プロジェクト: AyyerLab/CuPADMAN
    def make_mask(self, cmin=0.05, bg=False):
        mask = self.bgmask if bg else self.mask

        num_circ = 20
        mcen = self.size // 2
        x, y = cp.indices(mask.shape, dtype='f8')
        pixrad = cp.sqrt((x - mcen)**2 + (y - mcen)**2)
        mask[pixrad < mcen - 1] = 1.
        for _ in range(num_circ):
            rad = cp.random.rand(1, dtype='f8') * self.size / 5.
            while True:
                cen = cp.random.rand(2, dtype='f8') * self.size
                dist = float(
                    cp.sqrt((cen[0] - mcen)**2 + (cen[1] - mcen)**2) + rad)
                if dist < mcen:
                    break

            pixrad = cp.sqrt((x - cen[0])**2 + (y - cen[1])**2)
            mask[pixrad <=
                 rad] *= cmin + (1. - cmin) * (pixrad[pixrad <= rad] / rad)**2

        if bg:
            mask *= self.bg_count / mask.sum()
            self.bgmask_sum = float(mask.sum())
        else:
            mask *= self.mean_count / mask.sum()
            self.mask_sum = float(mask.sum())

            with h5py.File(self.out_file, 'a') as fptr:
                if 'solution' in fptr:
                    del fptr['solution']
                fptr['solution'] = mask.get()
コード例 #3
0
    def __getitem__(self, key):
        if isinstance(key, slice):
            step = key.step
            stop = key.stop
            start = key.start
            if start is None:
                start = 0
            if isinstance(step, complex):
                step = abs(step)
                length = int(step)
                if step != 1:
                    step = (key.stop - start) / float(step - 1)
                stop = key.stop + step
                return cupy.arange(0, length, 1, float) * step + start
            else:
                return cupy.arange(start, stop, step)

        size = []
        typ = int
        for k in range(len(key)):
            step = key[k].step
            start = key[k].start
            if start is None:
                start = 0
            if step is None:
                step = 1
            if isinstance(step, complex):
                size.append(int(abs(step)))
                typ = float
            else:
                size.append(
                    int(math.ceil((key[k].stop - start) / (step * 1.0))))
            if (isinstance(step, float) or
                    isinstance(start, float) or
                    isinstance(key[k].stop, float)):
                typ = float
        if self.sparse:
            nn = [cupy.arange(_x, dtype=_t)
                  for _x, _t in zip(size, (typ,) * len(size))]
        else:
            nn = cupy.indices(size, typ)
        for k in range(len(size)):
            step = key[k].step
            start = key[k].start
            if start is None:
                start = 0
            if step is None:
                step = 1
            if isinstance(step, complex):
                step = int(abs(step))
                if step != 1:
                    step = (key[k].stop - start) / float(step - 1)
            nn[k] = (nn[k] * step + start)
        if self.sparse:
            slobj = [cupy.newaxis] * len(size)
            for k in range(len(size)):
                slobj[k] = slice(None, None)
                nn[k] = nn[k][slobj]
                slobj[k] = cupy.newaxis
        return nn
コード例 #4
0
 def make_mask(self, array, index):
     tmp = np.zeros_like(array)
     n, height, width, ch = array.shape
     array = array.reshape(n, height * width, ch)
     idx_list = np.argmax(array, axis=1)
     n_idx, c_idx = np.indices((n, ch))
     tmp.reshape(n, height * width, ch)[n_idx, idx_list, c_idx] = 1
     self.mask[index] = tmp
コード例 #5
0
    def _save_mask(self, x: cp.array, cords: Tuple[int, int]) -> None:
        mask = cp.zeros_like(x)
        n, h, w, c = x.shape
        x = x.reshape(n, h * w, c)
        idx = cp.argmax(x, axis=1)

        n_idx, c_idx = cp.indices((n, c))
        mask.reshape((n, h * w, c))[n_idx, idx, c_idx] = 1
        self._cache[cords] = mask
コード例 #6
0
    def save_mask(x, cords):
        mask = engine.zeros_like(x)
        n, c, h, w = x.shape
        x = x.reshape(n, h * w, c)
        idx = engine.argmax(x, axis=1)

        n_idx, c_idx = engine.indices((n, c))
        mask.reshape((n, h * w, c))[n_idx, idx, c_idx] = 1
        cache[cords] = mask
コード例 #7
0
ファイル: max_pool2d.py プロジェクト: Oschart/ConvNet
    def save_max_mask(self, X, cords):
        mask = cp.zeros_like(X)
        n, h, w, c = X.shape
        X = X.reshape(n, h * w, c)
        idx = cp.argmax(X, axis=1)

        n_idx, c_idx = cp.indices((n, c))

        mask.reshape(n, h * w, c)[n_idx, idx, c_idx] = 1
        self._cache[cords] = mask
コード例 #8
0
    def set_args(self, dtype):

        imaged = cupy.testing.shaped_random(self.shape, xp=cp, dtype=dtype)
        image = cp.asnumpy(imaged)

        rstate = cp.random.RandomState(5)
        ndim = len(self.shape)
        coordsd = cp.indices(
            self.shape) + 0.1 * rstate.standard_normal((ndim, ) + self.shape)
        coords = cupy.asnumpy(coordsd)

        self.args_cpu = (image, coords)
        self.args_gpu = (imaged, coordsd)
コード例 #9
0
ファイル: rbdiff.py プロジェクト: AyyerLab/diffuser
    def init_diffcalc(self, translate=True):
        '''Initialize accumulation of 1st and 2nd moments for diffuse intensity calculation'''
        self.mean_fdens = cp.zeros_like(self.fdens)
        self.mean_intens = cp.zeros_like(self.fdens, dtype='f4')
        self.denominator = 0.

        if translate and self.size != self.fdens.shape[0]:
            self.size = self.fdens.shape[0]
            cen = self.size // 2
            x, y, z = cp.indices(self.fdens.shape, dtype='f4')
            x = (x - cen) / self.size * 2. * cp.pi
            y = (y - cen) / self.size * 2. * cp.pi
            z = (z - cen) / self.size * 2. * cp.pi
            self.qvec = (x, y, z)
コード例 #10
0
ファイル: ngen.py プロジェクト: calper-ql/NGEN
def create_point_grid(lu, ru, rd, ld, h, w):
    lu = cp.array(lu)
    ru = cp.array(ru)
    rd = cp.array(rd)
    ld = cp.array(ld)
    indices = cp.indices([w, h, 3])
    ih = indices[0] / (h - 1)
    iw = indices[1] / (w - 1)
    rows = ((lu + (ru - lu) * iw) + (ld + (rd - ld) * iw)) / 2.0
    rows[..., 1] = 0.0
    cols = ((lu + (ld - lu) * ih) + (ru + (rd - ru) * ih)) / 2.0
    cols[..., 0] = 0.0

    return cols + rows
コード例 #11
0
def DFT_matrix(Nd, om=None):
    dim = len(Nd)  # dimension
    if om is None:
        om = fake_Cartesian(Nd)
    N = numpy.prod(Nd)
    omN = cupy.zeros((N, dim), dtype=numpy.float64)
    grid = cupy.indices(Nd)
    for dimid in range(0, dim):
        omN[:, dimid] = (grid[dimid].ravel() - Nd[dimid] / 2)
    M = om.shape[0]
    A = cupy.einsum('m, n -> mn', om[:, 0], omN[:, 0], optimize='optimal')
    for d in range(1, dim):
        A += cupy.einsum('m, n -> mn', om[:, d], omN[:, d], optimize='optimal')

    return cupy.exp(-1.0j * A)
コード例 #12
0
ファイル: generate.py プロジェクト: toslunar/cupy
def triu_indices(n, k=0, m=None):
    """Returns the indices of the upper triangular matrix.
    Here, the first group of elements contains row coordinates
    of all indices and the second group of elements
    contains column coordinates.

    Parameters
    ----------
    n : int
        The size of the arrays for which the returned indices will
        be valid.
    k : int, optional
        Refers to the diagonal offset. By default, `k = 0` i.e.
        the main dialogal. The positive value of `k`
        denotes the diagonals above the main diagonal, while the negative
        value includes the diagonals below the main diagonal.
    m : int, optional
        The column dimension of the arrays for which the
        returned arrays will be valid. By default, `m = n`.

    Returns
    -------
    y : tuple of ndarrays
        The indices for the triangle. The returned tuple
        contains two arrays, each with the indices along
        one dimension of the array.

    See Also
    --------
    numpy.triu_indices

    """

    tri_ = ~cupy.tri(n, m, k=k - 1, dtype=bool)

    return tuple(
        cupy.broadcast_to(inds, tri_.shape)[tri_]
        for inds in cupy.indices(tri_.shape, dtype=int))
コード例 #13
0
ファイル: generate.py プロジェクト: toslunar/cupy
def tril_indices(n, k=0, m=None):
    """Returns the indices of the lower triangular matrix.
    Here, the first group of elements contains row coordinates
    of all indices and the second group of elements
    contains column coordinates.

    Parameters
    ----------
    n : int
        The row dimension of the arrays for which the returned
        indices will be valid.
    k : int, optional
        Diagonal above which to zero elements. `k = 0`
        (the default) is the main diagonal, `k < 0` is
        below it and `k > 0` is above.
    m : int, optional
        The column dimension of the arrays for which the
        returned arrays will be valid. By default, `m = n`.

    Returns
    -------
    y : tuple of ndarrays
        The indices for the triangle. The returned tuple
        contains two arrays, each with the indices along
        one dimension of the array.

    See Also
    --------
    numpy.tril_indices

    """

    tri_ = cupy.tri(n, m, k=k, dtype=bool)

    return tuple(
        cupy.broadcast_to(inds, tri_.shape)[tri_]
        for inds in cupy.indices(tri_.shape, dtype=int))
コード例 #14
0
ファイル: cupyx_ndimage.py プロジェクト: elda27/deepnet
def affine_transform(input,
                     matrix,
                     offset=0.0,
                     output_shape=None,
                     output=None,
                     order=None,
                     mode='constant',
                     cval=0.0,
                     prefilter=True):
    """Apply an affine transformation.
    Given an output image pixel index vector ``o``, the pixel value is
    determined from the input image at position
    ``cupy.dot(matrix, o) + offset``.
    Args:
        input (cupy.ndarray): The input array.
        matrix (cupy.ndarray): The inverse coordinate transformation matrix,
            mapping output coordinates to input coordinates. If ``ndim`` is the
            number of dimensions of ``input``, the given matrix must have one
            of the following shapes:
                - ``(ndim, ndim)``: the linear transformation matrix for each
                  output coordinate.
                - ``(ndim,)``: assume that the 2D transformation matrix is
                  diagonal, with the diagonal specified by the given value.
                - ``(ndim + 1, ndim + 1)``: assume that the transformation is
                  specified using homogeneous coordinates. In this case, any
                  value passed to ``offset`` is ignored.
                - ``(ndim, ndim + 1)``: as above, but the bottom row of a
                  homogeneous transformation matrix is always
                  ``[0, 0, ..., 1]``, and may be omitted.
        offset (float or sequence): The offset into the array where the
            transform is applied. If a float, ``offset`` is the same for each
            axis. If a sequence, ``offset`` should contain one value for each
            axis.
        output_shape (tuple of ints): Shape tuple.
        output (cupy.ndarray or ~cupy.dtype): The array in which to place the
            output, or the dtype of the returned array.
        order (int): The order of the spline interpolation. If it is not given,
            order 1 is used. It is different from :mod:`scipy.ndimage` and can
            change in the future. Currently it supports only order 0 and 1.
        mode (str): Points outside the boundaries of the input are filled
            according to the given mode (``'constant'``, ``'nearest'``,
            ``'mirror'`` or ``'opencv'``). Default is ``'constant'``.
        cval (scalar): Value used for points outside the boundaries of
            the input if ``mode='constant'`` or ``mode='opencv'``. Default is
            0.0
        prefilter (bool): It is not used yet. It just exists for compatibility
            with :mod:`scipy.ndimage`.
    Returns:
        cupy.ndarray or None:
            The transformed input. If ``output`` is given as a parameter,
            ``None`` is returned.
    .. seealso:: :func:`scipy.ndimage.affine_transform`
    """

    _check_parameter('affine_transform', order, mode)

    if not hasattr(offset, '__iter__') and type(offset) is not cupy.ndarray:
        offset = [offset] * input.ndim

    if matrix.ndim == 1:
        # TODO(mizuno): Implement zoom_shift
        matrix = cupy.diag(matrix)
    elif matrix.shape[0] == matrix.shape[1] - 1:
        offset = matrix[:, -1]
        matrix = matrix[:, :-1]
    elif matrix.shape[0] == input.ndim + 1:
        offset = matrix[:-1, -1]
        matrix = matrix[:-1, :-1]

    if mode == 'opencv':
        m = cupy.zeros((input.ndim + 1, input.ndim + 1))
        m[:-1, :-1] = matrix
        m[:-1, -1] = offset
        m[-1, -1] = 1
        m = cupy.linalg.inv(m)
        m[:2] = cupy.roll(m[:2], 1, axis=0)
        m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1)
        matrix = m[:-1, :-1]
        offset = m[:-1, -1]

    if output_shape is None:
        output_shape = input.shape

    coordinates = cupy.indices(output_shape, dtype=cupy.float64)
    coordinates = cupy.dot(matrix, coordinates.reshape((input.ndim, -1)))
    coordinates += cupy.expand_dims(cupy.asarray(offset), -1)
    ret = _get_output(output, input, output_shape)
    ret[:] = map_coordinates(input, coordinates, ret.dtype, order, mode, cval,
                             prefilter).reshape(output_shape)
    return ret
コード例 #15
0
ファイル: bench_core.py プロジェクト: okuta/cupy-benchmark
 def time_indices(self):
     np.indices((1000, 500))
コード例 #16
0
def affine_transform(input,
                     matrix,
                     offset=0.0,
                     output_shape=None,
                     output=None,
                     order=3,
                     mode='constant',
                     cval=0.0,
                     prefilter=True,
                     *,
                     texture_memory=False):
    """Apply an affine transformation.

    Given an output image pixel index vector ``o``, the pixel value is
    determined from the input image at position
    ``cupy.dot(matrix, o) + offset``.

    Args:
        input (cupy.ndarray): The input array.
        matrix (cupy.ndarray): The inverse coordinate transformation matrix,
            mapping output coordinates to input coordinates. If ``ndim`` is the
            number of dimensions of ``input``, the given matrix must have one
            of the following shapes:

                - ``(ndim, ndim)``: the linear transformation matrix for each
                  output coordinate.
                - ``(ndim,)``: assume that the 2D transformation matrix is
                  diagonal, with the diagonal specified by the given value.
                - ``(ndim + 1, ndim + 1)``: assume that the transformation is
                  specified using homogeneous coordinates. In this case, any
                  value passed to ``offset`` is ignored.
                - ``(ndim, ndim + 1)``: as above, but the bottom row of a
                  homogeneous transformation matrix is always
                  ``[0, 0, ..., 1]``, and may be omitted.

        offset (float or sequence): The offset into the array where the
            transform is applied. If a float, ``offset`` is the same for each
            axis. If a sequence, ``offset`` should contain one value for each
            axis.
        output_shape (tuple of ints): Shape tuple.
        output (cupy.ndarray or ~cupy.dtype): The array in which to place the
            output, or the dtype of the returned array.
        order (int): The order of the spline interpolation, default is 3. Must
            be in the range 0-5.
        mode (str): Points outside the boundaries of the input are filled
            according to the given mode (``'constant'``, ``'nearest'``,
            ``'mirror'``, ``'reflect'``, ``'wrap'``, ``'grid-mirror'``,
            ``'grid-wrap'``, ``'grid-constant'`` or ``'opencv'``).
        cval (scalar): Value used for points outside the boundaries of
            the input if ``mode='constant'`` or ``mode='opencv'``. Default is
            0.0
        prefilter (bool): It is not used yet. It just exists for compatibility
            with :mod:`scipy.ndimage`.
        texture_memory (bool): If True, uses GPU texture memory. Supports only:

            - 2D and 3D float32 arrays as input
            - ``(ndim + 1, ndim + 1)`` homogeneous float32 transformation
                matrix
            - ``mode='constant'`` and ``mode='nearest'``
            - ``order=0`` (nearest neighbor) and ``order=1`` (linear
                interpolation)

    Returns:
        cupy.ndarray or None:
            The transformed input. If ``output`` is given as a parameter,
            ``None`` is returned.

    .. seealso:: :func:`scipy.ndimage.affine_transform`
    """

    if texture_memory:
        tm_interp = 'linear' if order > 0 else 'nearest'
        return _texture.affine_transformation(data=input,
                                              transformation_matrix=matrix,
                                              output_shape=output_shape,
                                              output=output,
                                              interpolation=tm_interp,
                                              mode=mode,
                                              border_value=cval)

    _check_parameter('affine_transform', order, mode)

    offset = _util._fix_sequence_arg(offset, input.ndim, 'offset', float)

    if matrix.ndim not in [1, 2] or matrix.shape[0] < 1:
        raise RuntimeError('no proper affine matrix provided')
    if matrix.ndim == 2:
        if matrix.shape[0] == matrix.shape[1] - 1:
            offset = matrix[:, -1]
            matrix = matrix[:, :-1]
        elif matrix.shape[0] == input.ndim + 1:
            offset = matrix[:-1, -1]
            matrix = matrix[:-1, :-1]
        if matrix.shape != (input.ndim, input.ndim):
            raise RuntimeError('improper affine shape')

    if mode == 'opencv':
        m = cupy.zeros((input.ndim + 1, input.ndim + 1))
        m[:-1, :-1] = matrix
        m[:-1, -1] = offset
        m[-1, -1] = 1
        m = cupy.linalg.inv(m)
        m[:2] = cupy.roll(m[:2], 1, axis=0)
        m[:2, :2] = cupy.roll(m[:2, :2], 1, axis=1)
        matrix = m[:-1, :-1]
        offset = m[:-1, -1]

    if output_shape is None:
        output_shape = input.shape

    if mode == 'opencv' or mode == '_opencv_edge':
        if matrix.ndim == 1:
            matrix = cupy.diag(matrix)
        coordinates = cupy.indices(output_shape, dtype=cupy.float64)
        coordinates = cupy.dot(matrix, coordinates.reshape((input.ndim, -1)))
        coordinates += cupy.expand_dims(cupy.asarray(offset), -1)
        ret = _util._get_output(output, input, shape=output_shape)
        ret[:] = map_coordinates(input, coordinates, ret.dtype, order, mode,
                                 cval, prefilter).reshape(output_shape)
        return ret

    matrix = matrix.astype(cupy.float64, copy=False)
    ndim = input.ndim
    output = _util._get_output(output, input, shape=output_shape)
    if input.dtype.kind in 'iu':
        input = input.astype(cupy.float32)
    filtered, nprepad = _filter_input(input, prefilter, mode, cval, order)

    integer_output = output.dtype.kind in 'iu'
    _util._check_cval(mode, cval, integer_output)
    large_int = max(_prod(input.shape), _prod(output_shape)) > 1 << 31
    if matrix.ndim == 1:
        offset = cupy.asarray(offset, dtype=cupy.float64)
        offset = -offset / matrix
        kern = _interp_kernels._get_zoom_shift_kernel(
            ndim,
            large_int,
            output_shape,
            mode,
            cval=cval,
            order=order,
            integer_output=integer_output,
            nprepad=nprepad)
        kern(filtered, offset, matrix, output)
    else:
        kern = _interp_kernels._get_affine_kernel(
            ndim,
            large_int,
            output_shape,
            mode,
            cval=cval,
            order=order,
            integer_output=integer_output,
            nprepad=nprepad)
        m = cupy.zeros((ndim, ndim + 1), dtype=cupy.float64)
        m[:, :-1] = matrix
        m[:, -1] = cupy.asarray(offset, dtype=cupy.float64)
        kern(filtered, m, output)
    return output
コード例 #17
0
def warp_coords(coord_map, shape, dtype=np.float64):
    """Build the source coordinates for the output of a 2-D image warp.

    Parameters
    ----------
    coord_map : callable like GeometricTransform.inverse
        Return input coordinates for given output coordinates.
        Coordinates are in the shape (P, 2), where P is the number
        of coordinates and each element is a ``(row, col)`` pair.
    shape : tuple
        Shape of output image ``(rows, cols[, bands])``.
    dtype : np.dtype or string
        dtype for return value (sane choices: float32 or float64).

    Returns
    -------
    coords : (ndim, rows, cols[, bands]) array of dtype `dtype`
            Coordinates for `scipy.ndimage.map_coordinates`, that will yield
            an image of shape (orows, ocols, bands) by drawing from source
            points according to the `coord_transform_fn`.

    Notes
    -----

    This is a lower-level routine that produces the source coordinates for 2-D
    images used by `warp()`.

    It is provided separately from `warp` to give additional flexibility to
    users who would like, for example, to re-use a particular coordinate
    mapping, to use specific dtypes at various points along the the
    image-warping process, or to implement different post-processing logic
    than `warp` performs after the call to `ndi.map_coordinates`.


    Examples
    --------
    Produce a coordinate map that shifts an image up and to the right:

    >>> import cupy as cp
    >>> from cucim.skimage.transform import warp_coords
    >>> from skimage import data
    >>> from cupyx.scipy.ndimage import map_coordinates
    >>>
    >>> def shift_up10_left20(xy):
    ...     return xy - cp.array([-20, 10])[None, :]
    >>>
    >>> image = cp.array(data.astronaut().astype(cp.float32))
    >>> coords = warp_coords(shift_up10_left20, image.shape)
    >>> warped_image = map_coordinates(image, coords)

    """
    shape = safe_as_int(shape)
    rows, cols = shape[0], shape[1]
    coords_shape = [len(shape), rows, cols]
    if len(shape) == 3:
        coords_shape.append(shape[2])
    coords = cp.empty(coords_shape, dtype=dtype)

    # Reshape grid coordinates into a (P, 2) array of (row, col) pairs
    tf_coords = cp.indices((cols, rows), dtype=dtype).reshape(2, -1).T

    # Map each (row, col) pair to the source image according to
    # the user-provided mapping
    tf_coords = coord_map(tf_coords)

    # Reshape back to a (2, M, N) coordinate grid
    tf_coords = tf_coords.T.reshape((-1, cols, rows)).swapaxes(1, 2)

    # Place the y-coordinate mapping
    _stackcopy(coords[1, ...], tf_coords[0, ...])

    # Place the x-coordinate mapping
    _stackcopy(coords[0, ...], tf_coords[1, ...])

    if len(shape) == 3:
        coords[2, ...] = cp.arange(shape[2], dtype=coords.dtype)

    return coords
コード例 #18
0
    def __init__(self, config_file, num_streams=4):
        self.num_streams = num_streams
        self.comm = MPI.COMM_WORLD
        self.rank = self.comm.rank
        self.num_proc = self.comm.size
        self.mem_size = cp.cuda.Device(cp.cuda.runtime.getDevice()).mem_info[1]

        config = configparser.ConfigParser()
        config.read(config_file)

        self.size = config.getint('parameters', 'size')
        self.num_modes = config.getint('emc', 'num_modes', fallback=1)
        self.num_rot = config.getint('emc', 'num_rot')
        self.photons_file = os.path.join(os.path.dirname(config_file),
                                         config.get('emc', 'in_photons_file'))
        self.output_folder = os.path.join(
            os.path.dirname(config_file),
            config.get('emc', 'output_folder', fallback='data/'))
        self.log_file = os.path.join(
            os.path.dirname(config_file),
            config.get('emc', 'log_file', fallback='EMC.log'))
        self.need_scaling = config.getboolean('emc',
                                              'need_scaling',
                                              fallback=False)

        dia = tuple(
            [float(s) for s in config.get('emc', 'sphere_dia').split()])
        sx = (float(s) for s in config.get('emc', 'shiftx').split())
        sy = (float(s) for s in config.get('emc', 'shifty').split())
        self.shiftx, self.shifty, self.sphere_dia = np.meshgrid(
            np.linspace(*sx),
            np.linspace(*sy),
            np.linspace(*dia),
            indexing='ij')
        self.shiftx = self.shiftx.ravel()
        self.shifty = self.shifty.ravel()
        self.sphere_dia = self.sphere_dia.ravel()
        self.num_states = len(self.shiftx)
        print(self.num_states, 'sampled states')
        self.x_ind, self.y_ind = cp.indices((self.size, ) * 2, dtype='f8')
        self.x_ind = self.x_ind.ravel() - self.size // 2
        self.y_ind = self.y_ind.ravel() - self.size // 2
        self.rad = cp.sqrt(self.x_ind**2 + self.y_ind**2)
        self.invmask = cp.zeros(self.size**2, dtype=np.bool)
        self.invmask[self.rad < 4] = True
        self.invmask[self.rad >= self.size // 2] = True
        self.intinvmask = self.invmask.astype('i4')
        self.invsuppmask = cp.ones((self.size, ) * 2, dtype=np.bool)
        self.invsuppmask[66:119, 66:119] = False
        self.probmask = cp.zeros(self.size**2, dtype='i4')
        self.probmask[self.rad >= self.size // 2] = 2
        self.probmask[self.rad < self.size // 8] = 1
        self.probmask[self.rad < 4] = 2
        self.sphere_ramps = [
            self.ramp(i) * self.sphere(i) for i in range(self.num_states)
        ]
        self.sphere_intens = cp.abs(
            cp.array(self.sphere_ramps[:int(dia[2])])**2).mean(0)

        stime = time.time()
        self.dset = Dataset(self.photons_file, self.size**2, self.need_scaling)
        self.powder = self.dset.get_powder()
        etime = time.time()
        if self.rank == 0:
            print('%d frames with %.3f photons/frame (%.3f s) (%.2f MB)' % \
                    (self.dset.num_data, self.dset.mean_count, etime-stime, self.dset.mem/1024**2))
            sys.stdout.flush()
        self.model = np.empty((self.size**2, ), dtype='c16')
        if self.rank == 0:
            # Random model
            rmodel = np.random.random((self.size, ) * 2)
            rmodel[self.invsuppmask.get()] = 0
            self.model = np.fft.fftshift(np.fft.fftn(
                np.fft.ifftshift(rmodel))).flatten()
            self.model /= 2e3

            # Solution as init
            #with h5py.File('data/holo_dia.h5', 'r') as f:
            #    sol = f['solution'][:]
            #self.model = np.fft.fftshift(np.fft.fftn(np.fft.ifftshift(sol))).ravel() / 1.e3

            self.model[self.invmask.get()] = 0
            np.save('data/model_000.npy', self.model)
        self.comm.Bcast([self.model, MPI.C_DOUBLE_COMPLEX], root=0)
        if self.need_scaling:
            self.scales = self.dset.counts / self.dset.mean_count
        else:
            self.scales = cp.ones(self.dset.num_data, dtype='f8')
        self.prob = cp.array([])
        with open('kernels.cu', 'r') as f:
            kernels = cp.RawModule(code=f.read())
        self.k_slice_gen = kernels.get_function('slice_gen')
        self.k_slice_merge = kernels.get_function('slice_merge')
        self.k_slice_gen_holo = kernels.get_function('slice_gen_holo')
        self.k_calc_prob_all = kernels.get_function('calc_prob_all')
        self.k_merge_all = kernels.get_function('merge_all')
        self.k_proj_divide = kernels.get_function('proj_divide')

        self.bsize_model = int(np.ceil(self.size / 32.))
        self.bsize_data = int(np.ceil(self.dset.num_data / 32.))
        self.stream_list = [cp.cuda.Stream() for _ in range(self.num_streams)]