def phase_correlation(data, nr, nc, cyx=None, crop_r=None, sigma=2.0,
                      spf=100, pre_func=None, post_func=None, mode='2d',
                      ref_im=None, rebin=None, der_clip_fraction=0.0,
                      der_clip_max_pct=99.9, truncate=4.0, parallel=True,
                      ncores=None, print_stats=True, nrnc_are_chunks=False, origin='top', 
                      logger=None, callback=None):
    '''
    Perform phase correlation on 4-D data using efficient upscaling to
    achieve sub-pixel resolution.
    
    Parameters
    ----------
    data : array_like
        Mutidimensional data of shape (scanY, scanX, ..., detY, detX).
    nr : integer or None
        Number of rows to process at once.
    nc : integer or None
        Number of columns to process at once.
    cyx : length 2 iterable or None
        Centre of disk in pixels (cy, cx).
        If None, centre is used.
    crop_r : scalar or None
        Radius of circle about cyx defining square crop limits used for
        cross-corrolation, in pixels.
        If None, the maximum square array about cyx is used.
    sigma : scalar
        Smoothing of Gaussian derivitive.
    spf : integer
        Sub-pixel factor i.e. 1/spf is resolution.
    pre_func : callable
        Function that operates (out-of-place) on data before processing.
        out = pre_func(in), where in is nd_array of shape (n, detY, detX).
    post_func : callable
        Function that operates (out-of-place) on data after derivitive.
        out = post_func(in), where in is nd_array of shape (n, detY, detX).
    mode : string
        Derivative type. 
        If '1d', 1d convolution; faster but not so good for high sigma.
        If '2d', 2d convolution; more accurate but slower.
    ref_im : None or ndarray
        2-D image used as reference. 
        If None, the first probe position is used.
    rebin : integer or None
        Rebinning factor for detector dimensions. None or 1 for none. 
        If the value is incompatible with the cropped array shape, the
        nearest compatible value will be used instead. 
        'cyx' and 'crop_r' are for the original image and need not be modified.
        'sigma' and 'spf' are scaled with rebinning factor, as are output shifts.
    der_clip_fraction : float
        Fraction of `der_clip_max_pct` in derivative images below which will be
        to zero.
    der_clip_max_pct : float
        Percentile of derivative image to serve as reference for `der_clip_fraction`.
    truncate : scalar
        Number of sigma to which Gaussians are calculated.
    parallel : bool
        If True, derivative and correlation calculations are multiprocessed.
        Note, if `mode=1d`, the derivative calculations are not multiprocessed,
        but may be multithreaded if enabled in the numpy linked BLAS lib.
    ncores : None or int
        Number of cores to use for mutliprocessing. If None, all cores
        are used.
    print_stats : bool
        If True, statistics on the analysis are printed.
    nrnc_are_chunks : bool
        If True, `nr` and `nc` are interpreted as the number of chunks to
        process at once. If `data` is not chunked, `nr` and `nc` are used
        directly.
    origin : str
        Controls y-origin of returned data. If origin='top', pythonic indexing 
        is used. If origin='bottom', increasing y is up.
        
    Returns
    -------
    Tuple of (shift_yx, shift_err, shift_difp, ref), where:
    shift_yx : array_like
        Shift array in pixels, of shape ((y,x), scanY, scanX, ...).
        Increasing Y, X is disc shift up, right in image.
    shift_err : 2-D array
        Translation invariant normalized RMS error in correlations.
        See skimage.feature.register_translation for details.
    shift_difp : 2-D array
        Global phase difference between the two images.
        (should be zero if images are non-negative).
        See skimage.feature.register_translation for details.
    ref : 2-D array
        Reference image.
    
    Notes
    -----
    The order of operations is rebinning, pre_func, derivative, 
    post_func, and correlation.
    
    If `nr` or `nc` are None, the entire dimension is processed at once.
    For chunked data, setting `nrnc_are_chunks` to True, and `nr` and `nc`
    to a suitable values can improve performance.
    
    Specifying 'crop_r' (and appropriate cyx) can speed up calculation significantly.
    
    The execution of 'pre_func' and 'post_func' are not multiprocessed, so 
    they could employ multiprocessing for cpu intensive calculations.
    
    Efficient upscaling is based on:
    http://scikit-image.org/docs/stable/auto_examples/transform/plot_register_translation.html
    http://scikit-image.org/docs/stable/api/skimage.feature.html#skimage.feature.register_translation
    
    '''
    
    # der_clip_max_pct=99.9 'ignores' (256**2)*0.001 ~ 65 pixels.
    # (256**2)*0.001 / (2*3.14) / 2 ~ 5. == ignoring of 5 pix radius, 2 pix width torus
    
    if nrnc_are_chunks:
        nr, nc = fpdp._condition_nrnc_if_chunked(data, nr, nc, print_stats)
    
    if ncores is None:
        ncores = mp.cpu_count()
    
    nondet = data.shape[:-2]
    nonscan = data.shape[2:]
    scanY, scanX = data.shape[:2]
    detY, detX = data.shape[-2:]
    
    r_if, c_if = fpdp._block_indices((scanY, scanX), (nr, nc))
    
    rtn = fpdp._parse_crop_rebin(crop_r, detY, detX, cyx, rebin, print_stats)
    cropped_im_shape, rebinf, rebinning, rii, rif, cii, cif = rtn
       
    
    # rebinning
    rebinf = 1
    rebinning = rebin is not None and rebin != 1
    if rebinning:
        f, fs = fpdp._find_nearest_int_factor(cropped_im_shape[0], rebin)
        if rebin != f:
            print('Image data cropped to:', cropped_im_shape)
            print('Requested rebin (%d) changed to nearest value: %d. Possible values are:' %(rebin, f), fs)
            rebin = f
        rebinf = rebin
        sigma = float(sigma)/rebinf
        spf = int(float(spf)*rebinf)
    rebinned_im_shape = tuple([x//rebinf for x in cropped_im_shape])
    #print('Cropped shape: ', cropped_im_shape)
    #if rebinning:
        #print('Rebinned cropped shape: ', rebinned_im_shape)
    
    
    # gradient of gaussian
    gy, gx = fpdp._g2d_der(sigma, truncate=truncate)
    gxy = gx + 1j*gy
    #gxy = (gx**2 + gy**2)**0.5
    
    
    ### ref im
    if ref_im is None:
        # use first point
        ref_im = data[0, 0, ...]
        for i in range(len(nondet)-2):
            ref_im = ref_im[0]
    else:
        # provided option
        ref_im = ref_im
    
    ref = ref_im[rii:rif+1, cii:cif+1]
    for t in range(len(nondet)): 
        ref = np.expand_dims(ref, 0)    # ref[None, None, None, ...]
    if rebinning:
        ns = ref.shape[:-2] + tuple([int(x/rebin) for x in ref.shape[-2:]])
        ref = fpdp.rebinA(ref, *ns)
    ref = fpdp_new._process_grad(ref, pre_func, mode, sigma, truncate, gxy,
                        parallel, ncores, der_clip_fraction, der_clip_max_pct,
                        post_func)[0]
    
    
    shift_yx = np.empty(nondet + (2,))
    shift_err = np.empty(nondet)
    shift_difp = np.empty_like(shift_err)
    
    if print_stats:
        print('\nPerforming phase correlation')
        tqdm_file = sys.stderr
    else:
        tqdm_file = fpdp.DummyFile()
    total_nims = np.prod(nondet)
    if callback is not None:
        callback.maximum.emit(("phase_cor", total_nims))

    with tqdm(total=total_nims, file=tqdm_file, mininterval=0, leave=True, unit='images') as pbar:
        for i, (ri, rf) in enumerate(r_if):
            for j, (ci, cf) in enumerate(c_if):               
                # read selected data (into memory if hdf5)  
                d = data[ri:rf, ci:cf, ..., rii:rif+1, cii:cif+1]
                d = np.ascontiguousarray(d)
                if rebinning:
                    ns = d.shape[:-2] + tuple([int(x/rebinf) for x in d.shape[-2:]])
                    d = fpdp.rebinA(d, *ns)
                
                # calc grad
                gm = fpdp_new._process_grad(d, pre_func, mode, sigma, truncate, gxy,
                                   parallel, ncores, der_clip_fraction, der_clip_max_pct,
                                   post_func)
                
                # Could combine grad and reg to skip ifft/fft,
                # and calc ref grad fft only once
                # For 1d mode, could try similar combinations, but using 
                # functions across ndarray axes with multithreaded blas.
                
                ## do correlation
                # gm is (n, detY, detX), with last 2 rebinned
                partial_reg = fpdp.partial(fpdp.register_translation, ref, upsample_factor=spf)
                
                if parallel:
                    pool = Pool(ncores)
                    rslt = pool.map(partial_reg, gm)
                    pool.close()
                    pool.join()
                else:
                    rslt = list(map(partial_reg, gm))
                shift, error, phasediff = np.asarray(rslt).T
                shift = np.array(shift.tolist())
                # -ve shift to swap source/ref coords to be consistent with 
                # other phase analyses
                shift *= -1.0
                
                shift_yx[ri:rf, ci:cf].flat = shift
                shift_err[ri:rf, ci:cf].flat = error
                shift_difp[ri:rf, ci:cf].flat = phasediff
                
                if callback:
                    callback.progress.emit(("phase_cor", np.prod(d.shape[:-2])))
                else:
                    pbar.update(np.prod(d.shape[:-2]))

    if print_stats:
        print('')
        sys.stdout.flush()    
    shift_yx = np.rollaxis(shift_yx, -1, 0)
       
    # reverse y for shift up being positive
    flp = np.array([-1, 1])
    for i in range(len(nonscan)):
        flp = np.expand_dims(flp, -1)
    shift_yx = shift_yx*flp
    
    # default origin implementation is bottom
    if origin.lower() == 'top':
        shift_yx[0] = -shift_yx[0]
        
    # scale shifts for rebinning
    if rebinning:
        shift_yx *= rebinf
    
    # print stats
    if print_stats:
        if logger is not None:
            logger.log(fpdp_new.print_shift_stats(shift_yx, to_str=True))
        else:
            fpdp_new.print_shift_stats(shift_yx)

    return shift_yx, shift_err, shift_difp, ref
Esempio n. 2
0
def map_image_function(data,
                       nr,
                       nc,
                       cyx=None,
                       crop_r=None,
                       func=None,
                       params=None,
                       rebin=None,
                       parallel=True,
                       ncores=None,
                       print_stats=True,
                       nrnc_are_chunks=False):
    '''
    Map an arbitrary function over a multidimensional dataset.
    
    Parameters
    ----------
    data : array_like
        Mutidimensional data of shape (scanY, scanX, ..., detY, detX).
    nr : integer or None
        Number of rows to process at once (see Notes).
    nc : integer or None
        Number of columns to process at once (see Notes).
    cyx : length 2 iterable or None
        Centre of disk in pixels (cy, cx).
        If None, the centre is used.
    crop_r : scalar or None
        Radius of circle about `cyx` defining square crop limits used for
        cross-corrolation, in pixels.
        If None, the maximum square array about cyx is used.
    func : callable
        Function that operates (out-of-place) on an image: out = pre_func(im),
        where `im` is an ndarray of shape (detY, detX).
    params : None or dictionary
        If not None, a dictionary of parameters passed to the function.
    rebin : integer or None
        Rebinning factor for detector dimensions. None or 1 for none. 
        If the value is incompatible with the cropped array shape, the
        nearest compatible value will be used instead. 
        'cyx' and 'crop_r' are for the original image and need not be modified.
    parallel : bool
        If True, the calculations are multiprocessed.
    ncores : None or int
        Number of cores to use for mutliprocessing. If None, all cores
        are used.
    print_stats : bool
        If True, calculation progress is printed to stdout.
    nrnc_are_chunks : bool
        If True, `nr` and `nc` are interpreted as the number of chunks to
        process at once. If `data` is not chunked, `nr` and `nc` are used
        directly.
        
    Returns
    -------
    rtn : ndarray
        The result of mapping the function over the dataset. If the output of
        the function is non-uniform, the dimensions are those of the nondet axes
        and the dtype is object. If the function output is uniform, the first
        axis is of the length of the function return, unless it is singular, in
        which case it is removed.
    
    Notes
    -----
    
    If `nr` or `nc` are None, the entire dimension is processed at once.
    For chunked data, setting `nrnc_are_chunks` to True, and `nr` and `nc`
    to a suitable values can improve performance.
    
    Specifying 'crop_r' (and appropriate cyx) can speed up calculation significantly.
    
    Examples
    --------
    Center of mass:
    >>> import scipy as sp
    >>> import numpy as np
    >>> import fpd.fpd_processing as fpdp
    >>> from fpd.synthetic_data import disk_image, fpd_data_view
    
    >>> radius = 32
    >>> im = disk_image(intensity=1e3, radius=radius, size=256, upscale=8, dtype='u4')
    >>> data = fpd_data_view(im, (32,)*2, colours=0)
    >>> func = sp.ndimage.center_of_mass
    >>> com_y, com_x = fpdp.map_image_function(data, nr=9, nc=9, func=func)
    
    Non-uniform return:
    >>> def f(image):
    ...    l = np.random.randint(4)+1
    ...    return np.arange(l)
    >>> r = fpdp.map_image_function(data, nr=9, nc=9, func=f)

    Parameter passing:
    >>> def f(image, v):
    ...    return (image >= v).sum()
    >>> r = fpdp.map_image_function(data, nr=9, nc=9, func=f, params={'v' : 2})
    
    Doing very little (when reading from file, this is a measure of access
    and decompression overhead):
    >>> def f(image):
    ...    return None
    >>> data_chunk = data[:16, :16]
    >>> r = fpdp.map_image_function(data_chunk, nr=None, nc=None, func=f)
    
    '''

    if params is None:
        params = {}

    if nrnc_are_chunks:
        nr, nc = fpdp._condition_nrnc_if_chunked(data, nr, nc, print_stats)

    if ncores is None:
        ncores = mp.cpu_count()

    nondet = data.shape[:-2]
    nonscan = data.shape[2:]
    scanY, scanX = data.shape[:2]
    detY, detX = data.shape[-2:]

    r_if, c_if = fpdp._block_indices((scanY, scanX), (nr, nc))

    rtn = fpdp._parse_crop_rebin(crop_r, detY, detX, cyx, rebin, print_stats)
    cropped_im_shape, rebinf, rebinning, rii, rif, cii, cif = rtn

    rebinned_im_shape = tuple([x // rebinf for x in cropped_im_shape])
    #print('Cropped shape: ', cropped_im_shape)
    #if rebinning:
    #print('Rebinned cropped shape: ', rebinned_im_shape)

    rd = np.empty(nondet, dtype=object)

    if print_stats:
        print('\nMapping image function')
        tqdm_file = sys.stderr
    else:
        tqdm_file = fpdp.DummyFile()
    total_nims = np.prod(nondet)
    with tqdm(total=total_nims,
              file=tqdm_file,
              mininterval=0,
              leave=True,
              unit='images') as pbar:
        for i, (ri, rf) in enumerate(r_if):
            for j, (ci, cf) in enumerate(c_if):
                # read selected data (into memory if hdf5)
                d = data[ri:rf, ci:cf, ..., rii:rif + 1, cii:cif + 1]
                d = np.ascontiguousarray(d)
                if rebinning:
                    ns = d.shape[:-2] + tuple(
                        [int(x / rebinf) for x in d.shape[-2:]])
                    d = fpdp.rebinA(d, *ns)

                partial_func = partial(func, **params)
                d_shape = d.shape
                d.shape = (np.prod(d_shape[:-2]), ) + d_shape[-2:]

                if parallel:
                    pool = Pool(ncores)
                    rslt = pool.map(partial_func, d)
                    pool.close()
                else:
                    rslt = list(map(partial_func, d))

                t = np.empty(len(rslt), dtype=object)
                t[:] = rslt
                rd[ri:rf, ci:cf].flat = t

                pbar.update(np.prod(d.shape[:-2]))
    if print_stats:
        print('')
        sys.stdout.flush()

    # convert dtype from object to more appropriate type if possible
    try:
        rdf = rd.ravel()
        rdfa = np.vstack(rdf).reshape(rd.shape + (-1, ))
        rtn = np.rollaxis(rdfa, -1, 0)
    except ValueError:
        rtn = rd

    # remove return axis if singular
    if rtn.ndim > rd.ndim:
        if rtn.shape[0] == 1:
            rtn = rtn[0]

    return rtn
Esempio n. 3
0
def sum_dif(data, nr, nc, mask=None, nrnc_are_chunks=False, callback=None):
    '''
    Return a summed diffraction image from data. 

    Parameters
    ----------
    data : array_like
        Mutidimensional fpd data of shape (scanY, scanX, ..., detY, detX).
    nr : integer or None
        Number of rows to process at once (see Notes).
    nc : integer or None
        Number of columns to process at once (see Notes).
    mask : array-like or None
        Mask applied to data before taking sum.
        Shape should be that of the scan.
    nrnc_are_chunks : bool
        If True, `nr` and `nc` are interpreted as the number of chunks to
        process at once. If `data` is not chunked, `nr` and `nc` are used
        directly.
    progress_callback : CustomSignals
        If set, show the progress on the widget bar instead

    Returns
    -------
    Array of shape (..., detY, detX).

    Notes
    -----
    If `nr` or `nc` are None, the entire dimension is processed at once.
    For chunked data, setting `nrnc_are_chunks` to True, and `nr` and `nc`
    to a suitable values can improve performance.

    '''

    if nrnc_are_chunks:
        nr, nc = fpdp._condition_nrnc_if_chunked(data, nr, nc, True)

    nondet = data.shape[:-2]
    nonscan = data.shape[2:]
    scanY, scanX = data.shape[:2]
    detY, detX = data.shape[-2:]

    r_if, c_if = fpdp._block_indices((scanY, scanX), (nr, nc))
    if mask is not None:
        for i in range(len(nonscan)):
            mask = np.expand_dims(mask, -1)
            # == mask = mask[..., None,None,None]

    sum_dif = np.zeros(nonscan)
    print('Calculating diffraction sum images.')
    total_ims = np.prod(nondet)
    if callback is not None:
        callback.maximum.emit(("sum_dif", total_ims))

    with tqdm(total=total_ims, mininterval=0, leave=True,
              unit='images') as pbar:
        for i, (ri, rf) in enumerate(r_if):
            for j, (ci, cf) in enumerate(c_if):
                d = data[ri:rf, ci:cf, ...]
                d = np.ascontiguousarray(d)
                if mask is not None:
                    d = d * mask
                sum_dif += d.sum((0, 1))
                if callback:
                    callback.progress.emit(("sum_dif", np.prod(d.shape[:-2])))
                else:
                    pbar.update(np.prod(d.shape[:-2]))

    print('\n')
    return sum_dif
Esempio n. 4
0
def center_of_mass(data,
                   nr,
                   nc,
                   aperture=None,
                   pre_func=None,
                   thr=None,
                   rebin=None,
                   parallel=True,
                   ncores=None,
                   print_stats=True,
                   nrnc_are_chunks=False,
                   origin='top',
                   widget=None,
                   callback=None):
    '''
    Calculate a centre of mass image from fpd data. The results are
    naturally sub-pixel resolution.

    Parameters
    ----------
    data : array_like
        Mutidimensional data of shape (scanY, scanX, ..., detY, detX).
    nr : integer or None
        Number of rows to process at once.
    nc : integer or None
        Number of columns to process at once.
    aperture : array_like
        Mask of shape (detY, detX), applied to diffraction data after
        `pre_func` processing. Note, the data is automatically cropped
        according to the mask for efficiency.
    pre_func : callable
        Function that operates (out-of-place) on data before processing.
        out = pre_func(in), where in is nd_array of shape (n, detY, detX).
    thr : object
        Control thresholding of difraction image.
        If None, no thresholding.
        If scalar, threshold value.
        If string, 'otsu' for otsu thresholding.
        If callable, function(2-D array) returns thresholded image.
    rebin : integer or None
        Rebinning factor for detector dimensions. None or 1 for none. 
        If the value is incompatible with the cropped array shape, the
        nearest compatible value will be used instead. 
    parallel : bool
        If True, calculations are multiprocessed.
    ncores : None or int
        Number of cores to use for mutliprocessing. If None, all cores
        are used.
    print_stats : bool
        If True, statistics on the analysis are printed.
    nrnc_are_chunks : bool
        If True, `nr` and `nc` are interpreted as the number of chunks to
        process at once. If `data` is not chunked, `nr` and `nc` are used
        directly.
    origin : str
        Controls y-origin of returned data. If origin='top', pythonic indexing 
        is used. If origin='bottom', increasing y is up.
    widget : QtWidget
        A qt widget that must contain as many widgets or dock widget as there is popup window

    Returns
    -------
    Array of shape (yx, scanY, scanX, ...).
    Increasing Y, X CoM is disc shift up, right in image.

    Notes
    -----
    The order of operations is rebinning, pre_func, threshold, aperture,
    and CoM.

    If `nr` or `nc` are None, the entire dimension is processed at once.
    For chunked data, setting `nrnc_are_chunks` to True, and `nr` and `nc`
    to a suitable values can improve performance.

    The execution of pre_func is not multiprocessed, so it could employ 
    multiprocessing for cpu intensive calculations.

    Multiprocessing runs at a similar speed as non parallel code
    in the simplest case.

    Examples
    --------
    Using an aperture and rebinning:

    >>> import numpy as np
    >>> import fpd.fpd_processing as fpdp
    >>> from fpd.synthetic_data import disk_image, fpd_data_view

    >>> radius = 32
    >>> im = disk_image(intensity=1e3, radius=radius, size=256, upscale=8, dtype='u4')
    >>> data = fpd_data_view(im, (32,)*2, colours=0)
    >>> ap = fpdp.synthetic_aperture(data.shape[-2:], cyx=(128,)*2, rio=(0, 48), sigma=0, aaf=1)[0]
    >>> com_y, com_x = fpdp.center_of_mass(data, nr=9, nc=9, rebin=3, aperture=ap)


    '''

    # Possible alternative was not as fast in tests:
    # from scipy.ndimage.measurements import center_of_mass
    if nrnc_are_chunks:
        nr, nc = fpdp._condition_nrnc_if_chunked(data, nr, nc, print_stats)

    if ncores is None:
        ncores = mp.cpu_count()

    nondet = data.shape[:-2]
    nonscan = data.shape[2:]
    scanY, scanX = data.shape[:2]
    detY, detX = data.shape[-2:]

    use_ap = False
    if isinstance(aperture, np.ndarray):
        # determine limits to index array for efficiency
        rii, rif = np.where(aperture.sum(axis=1) > 0)[0][[0, -1]]
        cii, cif = np.where(aperture.sum(axis=0) > 0)[0][[0, -1]]
        use_ap = True
    else:
        rii, rif = 0, detY - 1
        cii, cif = 0, detX - 1
    data_square_len = rif - rii + 1

    # TODO: the following is very similar to _parse_crop_rebin, except it operates
    # only on rii etc These could be refactored and combined to simplify.
    # rebinning
    rebinf = 1
    rebinning = rebin is not None and rebin != 1
    if rebinning:
        # change crop
        extra_pixels = int(
            np.ceil(data_square_len / float(rebin)) * rebin) - data_square_len
        ext_pix_pads = extra_pixels // 2

        # this is where the decision on if extra pixels can be added and where
        # they should go could be made
        if extra_pixels % 2:
            # odd
            ext_pix_pads = (-ext_pix_pads, ext_pix_pads + 1)
        else:
            # even
            ext_pix_pads = (-ext_pix_pads, ext_pix_pads)
        riic, rifc = rii + ext_pix_pads[0], rif + ext_pix_pads[1]
        ciic, cifc = cii + ext_pix_pads[0], cif + ext_pix_pads[1]
        if riic < 0 or rifc > detY - 1 or ciic < 0 or cifc > detX - 1:
            # change rebin
            f, fs = fpdp._find_nearest_int_factor(data_square_len, rebin)
            if rebin != f:
                if print_stats:
                    print('Image data cropped to:', fpdp.cropped_im_shape)
                    print(
                        'Requested rebin (%d) changed to nearest value: %d. Possible values are:'
                        % (rebin, f), fs)
                rebin = f
        else:
            rii, rif = riic, rifc
            cii, cif = ciic, cifc
            cropped_im_shape = (rif + 1 - rii, cif + 1 - cii)
            if print_stats:
                print('Image data cropped to:', cropped_im_shape)
        rebinf = rebin

    if use_ap:
        aperture = aperture[rii:rif + 1, cii:cif + 1].astype(np.float)
        if rebinning:
            ns = tuple([int(x / rebin) for x in aperture.shape])
            aperture = fpdp.rebinA(aperture, *ns)

    r_if, c_if = fpdp._block_indices((scanY, scanX), (nr, nc))
    com_im = np.zeros(nondet + (2, ), dtype=np.float)
    yi, xi = np.indices((detY, detX))
    yi = yi[::-1, ...]  # reverse order so increasing Y is up.

    yixi = np.concatenate((yi[..., None], xi[..., None]), 2)
    yixi = yixi[rii:rif + 1, cii:cif + 1, :].astype(np.float)
    if rebinning:
        ns = tuple([int(x / rebin) for x in yixi.shape[:2]]) + yixi.shape[2:]
        yixi = fpdp.rebinA(yixi, *ns)
    yi0 = yixi[:, 0, 0]
    xi0 = yixi[0, :, 1]

    if print_stats:
        print('Calculating centre-of-mass')
        tqdm_file = sys.stderr
    else:
        tqdm_file = fpdp.DummyFile()
    total_nims = np.prod(nondet)
    if callback is not None:
        callback.maximum.emit(("com_yx", total_nims))

    with tqdm(total=total_nims,
              file=tqdm_file,
              mininterval=0,
              leave=True,
              unit='images') as pbar:
        for i, (ri, rf) in enumerate(r_if):
            for j, (ci, cf) in enumerate(c_if):
                d = data[ri:rf, ci:cf, ..., rii:rif + 1,
                         cii:cif + 1]  # .astype(np.float)
                d = np.ascontiguousarray(d)
                if rebinning:
                    ns = d.shape[:-2] + tuple(
                        [int(x / rebin) for x in d.shape[-2:]])
                    d = fpdp.rebinA(d, *ns)

                # modify with function
                if pre_func is not None:
                    d = pre_func(d.reshape((-1, ) + d.shape[-2:]))
                    d.shape = ns

                partial_comf = partial(fpdp._comf,
                                       use_ap=use_ap,
                                       aperture=aperture,
                                       yi0=yi0,
                                       xi0=xi0,
                                       thr=thr)

                d_shape = d.shape  # scanY, scanX, ..., detY, detX
                d.shape = (np.prod(d_shape[:-2]), ) + d_shape[-2:]
                # (scanY, scanX, ...), detY, detX

                if parallel:
                    pool = Pool(ncores)
                    rslt = pool.map(partial_comf, d)
                    pool.close()
                    pool.join()
                else:
                    rslt = list(map(partial_comf, d))
                rslt = np.asarray(rslt)

                #print(d_shape, com_im[ri:rf,ci:cf,...].shape, rslt.shape)
                rslt.shape = d_shape[:-2] + (2, )
                com_im[ri:rf, ci:cf, ...] = rslt

                if callback:
                    callback.progress.emit(("com_yx", np.prod(d.shape[:-2])))
                else:
                    pbar.update(np.prod(d.shape[:-2]))

    if print_stats:
        print('\n')
    com_im = (com_im) / rebinf**2

    # roll: (scanY, scanX, ..., yx) to (yx, scanY, scanX, ...)
    com_im = np.rollaxis(com_im, -1, 0)

    # default origin implementation is bottom
    if origin.lower() == 'top':
        com_im[0] = nonscan[0] - 1 - com_im[0]

    # print some stats
    if print_stats:
        print_shift_stats(com_im)

    return com_im