def prepare_dataset(X):
    len_ = X.shape[0]
    shape_ = X.shape

    d = int(da.sqrt(X.flatten().reshape(X.shape[0], -1).shape[1]))

    if len(shape_) == 4:
        d = int(da.sqrt(X.flatten().reshape(X.shape[0], -1).shape[1] / 3))
        X = da.reshape(X, [-1, d, d, 3])

    elif d == shape_[1] and len(shape_) == 3:
        X = da.reshape(X, [-1, d, d])
        X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32)

    else:
        r = d ** 2 - X.shape[1]
        train_padding = da.zeros((shape_[0], r))
        X = da.vstack([X, train_padding])

        X = da.reshape(X, [-1, d, d])
        X = da.array(list(map(lambda x: grey2rgb(x), X)), dtype=da.float32)

    print('Scaling dataset')
    if scalar is not None:
        X = scaler.transform(X.flatten().reshape(-1, 1).astype(da.float32)).reshape(X.shape)
    else:
        scaler = MinMaxScaler()
        X = scaler.fit_transform(X.flatten().reshape(-1, 1).astype(da.float32)).reshape(X.shape)

    return X
def xyz2lonlat(x__, y__, z__):
    """Get longitudes from cartesian coordinates.
    """
    R = 6370997.0
    lons = da.rad2deg(da.arccos(x__ / da.sqrt(x__ ** 2 + y__ ** 2))) * da.sign(y__)
    lats = da.sign(z__) * (90 - da.rad2deg(da.arcsin(da.sqrt(x__ ** 2 + y__ ** 2) / R)))

    return lons, lats
Ejemplo n.º 3
0
def xyz2lonlat(x__, y__, z__):
    """Get longitudes from cartesian coordinates.
    """
    R = 6370997.0
    lons = da.rad2deg(da.arccos(x__ / da.sqrt(x__ ** 2 + y__ ** 2))) * da.sign(y__)
    lats = da.sign(z__) * (90 - da.rad2deg(da.arcsin(da.sqrt(x__ ** 2 + y__ ** 2) / R)))

    return lons, lats
Ejemplo n.º 4
0
def pearson_influence(xarr: da.Array, yarr: da.Array) -> da.Array:
    """Calculating the influence for deleting a point on the pearson correlation"""

    if xarr.shape != yarr.shape:
        raise ValueError(
            f"The shape of xarr and yarr should be same, got {xarr.shape}, {yarr.shape}"
        )

    # Fast calculating the influence for removing one element on the correlation
    n = xarr.shape[0]

    x2, y2 = da.square(xarr), da.square(yarr)
    xy = xarr * yarr

    # The influence is vectorized on xarr and yarr, so we need to repeat all the sums for n times

    xsum = da.ones(n) * da.sum(xarr)
    ysum = da.ones(n) * da.sum(yarr)
    xysum = da.ones(n) * da.sum(xy)
    x2sum = da.ones(n) * da.sum(x2)
    y2sum = da.ones(n) * da.sum(y2)

    # Note: in we multiply (n-1)^2 to both denominator and numerator to avoid divisions.
    numerator = (n - 1) * (xysum - xy) - (xsum - xarr) * (ysum - yarr)

    varx = (n - 1) * (x2sum - x2) - da.square(xsum - xarr)
    vary = (n - 1) * (y2sum - y2) - da.square(ysum - yarr)
    denominator = da.sqrt(varx * vary)

    return da.map_blocks(itruediv,
                         numerator,
                         denominator,
                         dtype=numerator.dtype)
Ejemplo n.º 5
0
def corr(*args, axis=None, **kwargs):
    """
    Pearson's correlation coefficient
    """
    c = cov(*args, axis=axis, **kwargs)
    std = da.sqrt(np.diagonal(c, axis1=-1, axis2=-2))[..., None]
    return c / std / std.swapaxes(-1, -2)
Ejemplo n.º 6
0
def test_PowerMethod_std_method():
    N = 1000
    P = 100
    k = 10
    array = da.random.randint(0, 3, size=(N, P))
    for method in ['norm', 'binom']:
        new_array = make_snp_array(array, std_method=method)
        PM = PowerMethod(k=k, scoring_method='q-vals', tol=1e-13, factor=None)
        U_PM, S_PM, V_PM = PM.svd(array=new_array)

        mean = array.mean(axis=0)
        if method == 'norm':
            std = array.std(axis=0)
        else:
            p = mean / 2
            std = da.sqrt(2 * p * (1 - p))

        x = (array - mean).dot(np.diag(1 / std))

        U, S, V = da.linalg.svd(x)
        U_k, S_k, V_k = svd_to_trunc_svd(U, S, V, k=k)
        np.testing.assert_almost_equal(subspace_dist(U_PM, U_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_almost_equal(subspace_dist(V_PM, V_k, S_k),
                                       0,
                                       decimal=3)
        np.testing.assert_array_almost_equal(S_k, S_PM, decimal=2)
Ejemplo n.º 7
0
def euclidean_distances(X,
                        Y=None,
                        Y_norm_squared=None,
                        squared=False,
                        X_norm_squared=None):
    if X_norm_squared is not None:
        XX = X_norm_squared
        if XX.shape == (1, X.shape[0]):
            XX = XX.T
        elif XX.shape != (X.shape[0], 1):
            raise ValueError(
                "Incompatible dimensions for X and X_norm_squared")
    else:
        XX = row_norms(X, squared=True)[:, np.newaxis]
    if X is Y:
        YY = XX.T
    elif Y_norm_squared is not None:
        if Y_norm_squared.ndim < 2:
            YY = Y_norm_squared[:, np.newaxis]
        else:
            YY = Y_norm_squared
        if YY.shape != (1, Y.shape[0]):
            raise ValueError(
                "Incompatiable dimensions for Y and Y_norm_squared")
    else:
        YY = row_norms(Y, squared=True)[np.newaxis, :]

    # TODO: this often emits a warning. Silence it here?
    distances = -2 * X.dot(Y.T) + XX + YY
    distances = da.maximum(distances, 0)
    # TODO: scikit-learn sets the diagonal to 0 when X is Y.

    return distances if squared else da.sqrt(distances)
Ejemplo n.º 8
0
def euclidean(XA, XB):
    """Returns the distance between points using
       Euclidean distance (2-norm) as the distance metric between the
       points.

       Find the Euclidean distances between four 2-D coordinates:
        >>> coords = [(35.0456, -85.2672),
        ...           (35.1174, -89.9711),
        ...           (35.9728, -83.9422),
        ...           (36.1667, -86.7833)]
        >>> euclidean(coords, coords)
        array([[ 0.    ,  4.7044,  1.6172,  1.8856],
            [ 4.7044,  0.    ,  6.0893,  3.3561],
            [ 1.6172,  6.0893,  0.    ,  2.8477],
            [ 1.8856,  3.3561,  2.8477,  0.    ]])

    """
    mA = (XA.shape)[0]
    mB = (XB.shape)[0]

    distances = []

    for i in range(0, mA):
        dm = np.zeros(shape=(1, mB), dtype=np.double)
        for j in range(0, mB):
            XA_XB = XA[i, :] - XB[j, :]
            dm[0, j] = da.sqrt(da.dot(XA_XB, XA_XB))

        distances.append(
            da.from_array(dm, chunks=(mA + mB) / multiprocessing.cpu_count()))

    return da.concatenate(distances, axis=0)
Ejemplo n.º 9
0
def uirdftn(inarray, ndim=None, *args, **kwargs):
    """N-dim real unitary discrete Fourier transform

	This transform consider the Hermitian property of the transform
	from complex to real real input.

	Parameters
	----------
	inarray : ndarray
	The array to transform.

	ndim : int, optional
	The `ndim` last axis along wich to compute the transform. All
	axes by default.

	Returns
	-------
	outarray : array-like (the last ndim as (N - 1) * 2 lenght)
	"""
    if not ndim:
        ndim = inarray.ndim

    return dask_irfftn(inarray, axes=range(-ndim, 0), *args, **
                       kwargs) * da.sqrt(
                           da.prod(da.asarray(inarray.shape[-ndim:-1])) *
                           (inarray.shape[-1] - 1) * 2)
Ejemplo n.º 10
0
    def fit(
        self,
        X: Union[ArrayLike, DataFrameType],
        y: Optional[Union[ArrayLike, SeriesType]] = None,
    ) -> "StandardScaler":
        self._reset()
        attributes = OrderedDict()
        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values

        if self.with_mean:
            mean_ = nanmean(X, 0)
            attributes["mean_"] = mean_
        if self.with_std:
            var_ = nanvar(X, 0)
            scale_ = var_.copy()
            scale_[scale_ == 0] = 1
            scale_ = da.sqrt(scale_)
            attributes["scale_"] = scale_
            attributes["var_"] = var_

        attributes["n_samples_seen_"] = np.nan
        values = compute(*attributes.values())
        for k, v in zip(attributes, values):
            setattr(self, k, v)
        self.n_features_in_ = X.shape[1]
        return self
Ejemplo n.º 11
0
def da_linregress(x, y):
    """
    Refactor of the scipy linregress with numba, less checks for speed sake and
    done with dask arrays

    :param x: array for independent
    :param y:
    :return:
    """
    TINY = 1.0e-20
    # x = np.asarray(x)
    # y = np.asarray(y)
    arr = da.stack([x, y], axis=1)
    n = len(x)
    # average sum of squares:
    ssxm, ssxym, ssyxm, ssym = (da.dot(arr.T, arr) / n).ravel()
    r_num = ssxym
    r_den = np.sqrt(ssxm * ssym)
    if r_den == 0.0:
        r = 0.0
    else:
        r = r_num / r_den
        # test for numerical error propagation
        if r > 1.0:
            r = 1.0
        elif r < -1.0:
            r = -1.0
    df = n - 2
    slope = r_num / ssxm
    r_t = r + TINY
    t = r * da.sqrt(df / ((1.0 - r_t) * (1.0 + r_t)))
    prob = 2 * stats.distributions.t.sf(np.abs(t), df)
    return slope, r**2, prob
Ejemplo n.º 12
0
def mean_squared_error(
    y_true: ArrayLike,
    y_pred: ArrayLike,
    sample_weight: Optional[ArrayLike] = None,
    multioutput: Optional[str] = "uniform_average",
    squared: bool = True,
    compute: bool = True,
) -> ArrayLike:
    _check_sample_weight(sample_weight)
    output_errors = ((y_pred - y_true)**2).mean(axis=0)

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to np.average: uniform mean
            multioutput = None
    else:
        raise ValueError("Weighted 'multioutput' not supported.")
    result = output_errors.mean()
    if not squared:
        result = da.sqrt(result)
    if compute:
        result = result.compute()
    return result
Ejemplo n.º 13
0
def calc_lac(fcast, obs):
    """
    Method to calculate the Local Anomaly Correlation (LAC).  Uses numexpr
    for speed over larger datasets.

    Note: If necessary (memory concerns) in the future, the numexpr statements
    can be extended to use pytable arrays.  Would need to provide means to
    function, as summing over the dataset is still very slow it seems.

    Parameters
    ----------
    fcast: ndarray
        Time series of forecast data. M x N where M is the temporal dimension.
    obs: ndarray
        Time series of observations. M x N

    Returns
    -------
    lac: ndarray
        Local anomaly corellations for all locations over the time range.
    """
    # Calculate means of data
    f_mean = fcast.mean(axis=0)
    o_mean = obs.mean(axis=0)
    f_anom = fcast - f_mean
    o_anom = obs - o_mean

    # Calculate covariance between time series at each gridpoint
    cov = (f_anom * o_anom).sum(axis=0)

    # Calculate standardization terms
    f_std = (f_anom**2).sum(axis=0)
    o_std = (o_anom**2).sum(axis=0)
    if is_dask_array(f_std):
        f_std = da.sqrt(f_std)
    else:
        f_std = np.sqrt(f_std)

    if is_dask_array(o_std):
        o_std = da.sqrt(o_std)
    else:
        o_std = np.sqrt(o_std)

    std = f_std * o_std
    lac = cov / std

    return lac
Ejemplo n.º 14
0
def periodic_distance(a, b, periodic):
    '''Periodic distance between two arrays. Periodic is a 3
    dimensional array containing the 3 box sizes.

    '''
    delta = abs(a - b)
    delta = da.where(delta > 0.5 * periodic, periodic - delta, delta)
    return da.sqrt((delta ** 2).sum(axis=-1))
Ejemplo n.º 15
0
    def get_distance(tan1, tan2, cos3):
        """
        Gets distance component of Li kernels
        """

        temp = tan1 * tan1 + tan2 * tan2 - 2.0 * tan1 * tan2 * cos3

        return da.sqrt(da.maximum(temp, 0))
Ejemplo n.º 16
0
def get_array_moments(
        array: da.core.Array,
        mean: bool = True,
        std: bool = True,
        std_method: str = 'binom',
        axis: int = 0
) -> Tuple[Optional[da.core.Array], Optional[da.core.Array]]:
    """ Computes specified array_moments

    Parameters
    ----------
    array : array_like, shape (N, P)
        Array that moments will be computed from
    mean : bool
        Flag whether to compute mean of "array" along "axis"
    std : bool
        Flag whether to compute std of "array" along "axis"
    std_method : str
        Method used to compute standard deviation.

        Possible methods are:
            'norm' ===> Normal Distribution Standard Deviation. See np.std
            'binom' ====> Binomial Standard Deviation
                            sqrt(2*p*(1-p)), where p = "mean"/2
    axis : int
        Axis to compute mean and std along.

    Returns
    -------
    array_mean : da.core.array, optional
        If "mean" is false, returns None
        Otherwise returns the array mean
    array_std: da.core.array, optional
        If "std" is false, returns None
        Otherwise returns the array std
    """
    array_mean = None
    array_std = None

    if mean:
        array_mean = da.nanmean(array, axis=axis)

    if std:
        if std_method == 'binom':
            u = array_mean if mean else da.nanmean(array, axis=axis)
            u /= 2
            array_std = da.sqrt(2 * u * (1 - u))
        elif std_method == 'norm':
            array_std = da.nanstd(array, axis=axis)
        else:
            raise NotImplementedError(
                f'std_method, {std_method}, is not implemented ')

    array_mean, array_std = persist(array_mean, array_std)

    return array_mean, array_std
Ejemplo n.º 17
0
def _transformlng_dask(lng, lat):
    ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
          0.1 * lng * lat + 0.1 * da.sqrt(da.fabs(lng))
    ret += (20.0 * da.sin(6.0 * lng * pi) +
            20.0 * da.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * da.sin(lng * pi) +
            40.0 * da.sin(lng / 3.0 * pi)) * 2.0 / 3.0
    ret += (150.0 * da.sin(lng / 12.0 * pi) +
            300.0 * da.sin(lng / 30.0 * pi)) * 2.0 / 3.0
    return ret
Ejemplo n.º 18
0
def _transformlat_dask(lng, lat):
    ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
          0.1 * lng * lat + 0.2 * da.sqrt(da.fabs(lng))
    ret += (20.0 * da.sin(6.0 * lng * pi) +
            20.0 * da.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * da.sin(lat * pi) +
            40.0 * da.sin(lat / 3.0 * pi)) * 2.0 / 3.0
    ret += (160.0 * da.sin(lat / 12.0 * pi) +
            320 * da.sin(lat * pi / 30.0)) * 2.0 / 3.0
    return ret
Ejemplo n.º 19
0
def bw_std(image, block_shape, ddof=0, keep_shape=False):
    """
    Blockwise standard deviation.
    """
    # zero-mean
    bwm = bw_mean(image, block_shape, keep_shape=True)
    image_zm = image - bwm
    # follow standard deviation formula
    bws = bw_sum(image_zm**2, block_shape, keep_shape=keep_shape)
    return da.sqrt(bws / (np.prod(block_shape) - ddof))
Ejemplo n.º 20
0
def _unequal_var_ttest_denom(v1, n1, v2, n2):
    vn1 = v1 / n1
    vn2 = v2 / n2
    with np.errstate(divide="ignore", invalid="ignore"):
        df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))

    # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
    # Hence it doesn't matter what df is as long as it's not NaN.
    df = da.where(da.isnan(df), 1, df)  # XXX: np -> da
    denom = da.sqrt(vn1 + vn2)
    return df, denom
Ejemplo n.º 21
0
def _solve_quadratic_dask(a__, b__, c__, min_val=0.0, max_val=1.0):
    """Solve quadratic equation and return the valid roots from interval
    [*min_val*, *max_val*]

    """

    discriminant = b__ * b__ - 4 * a__ * c__

    # Solve the quadratic polynomial
    x_1 = (-b__ + da.sqrt(discriminant)) / (2 * a__)
    x_2 = (-b__ - da.sqrt(discriminant)) / (2 * a__)

    # Find valid solutions, ie. 0 <= t <= 1
    idxs = (x_1 < min_val) | (x_1 > max_val)
    x__ = da.where(idxs, x_2, x_1)

    idxs = (x__ < min_val) | (x__ > max_val)
    x__ = da.where(idxs, np.nan, x__)

    return x__
Ejemplo n.º 22
0
def _unequal_var_ttest_denom(v1, n1, v2, n2):
    vn1 = v1 / n1
    vn2 = v2 / n2
    with np.errstate(divide='ignore', invalid='ignore'):
        df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))

    # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
    # Hence it doesn't matter what df is as long as it's not NaN.
    df = da.where(da.isnan(df), 1, df)  # XXX: np -> da
    denom = da.sqrt(vn1 + vn2)
    return df, denom
Ejemplo n.º 23
0
    def filter(array: xr.DataArray, settings: dict, manager: LarvikManager = LarvikManager()) -> xr.DataArray:
        it = array
        prewx = dask_image.ndfilters.prewitt(it.data, axis=0)
        prewy = dask_image.ndfilters.prewitt(it.data, axis=1)

        prewittfiltered = da.sqrt(prewx * prewx + prewy * prewy)
        c = manager.meta.prepend(it, string="Prewitt of")
        channels = xr.DataArray(da.array(c), dims="c")

        x = xr.DataArray(prewittfiltered, dims=it.dims, coords={ **it.coords, "channels": channels})
        return x
Ejemplo n.º 24
0
def _solve_quadratic_dask(a__, b__, c__, min_val=0.0, max_val=1.0):
    """Solve quadratic equation and return the valid roots from interval
    [*min_val*, *max_val*]

    """

    discriminant = b__ * b__ - 4 * a__ * c__

    # Solve the quadratic polynomial
    x_1 = (-b__ + da.sqrt(discriminant)) / (2 * a__)
    x_2 = (-b__ - da.sqrt(discriminant)) / (2 * a__)

    # Find valid solutions, ie. 0 <= t <= 1
    idxs = (x_1 < min_val) | (x_1 > max_val)
    x__ = da.where(idxs, x_2, x_1)

    idxs = (x__ < min_val) | (x__ > max_val)
    x__ = da.where(idxs, np.nan, x__)

    return x__
def inside_circle(total_count, chunk_size=-1):
    x = da.random.uniform(size=(total_count), chunks=(chunk_size))

    y = da.random.uniform(size=(total_count), chunks=(chunk_size))

    radii = da.sqrt(x * x + y * y)
    filtered = da.where(radii <= 1.0)

    indices = np.array(filtered[0])
    count = len(radii[indices])

    return count
Ejemplo n.º 26
0
def _calculate_f(coeffs, points, x, y, sumcol, type='dask'):
    """
    Calculate the thin-plate energy function.
    """

    w = coeffs[:-3]
    a1, ax, ay = coeffs[-3:]

    for wi, Pi in zip(w, points):
        sumcol += wi * _U_dask(da.sqrt((x - Pi[0])**2 + (y - Pi[1])**2))

    return a1 + ax * x + ay * y + sumcol
Ejemplo n.º 27
0
def initialize_da(X, k, init='random', W=None, H=None):
    n_components = k
    n_samples, n_features = X.shape
    if init == 'random':
        avg = da.sqrt(X.mean() / n_components)
        H = avg * da.random.RandomState(42).normal(
            0,
            1,
            size=(n_components, n_features),
            chunks=(n_components, X.chunks[1][0]))
        W = avg * da.random.RandomState(42).normal(
            0,
            1,
            size=(n_samples, n_components),
            chunks=(n_samples, n_components))

        H = da.fabs(H)
        W = da.fabs(W)
        return W, H

    if init == 'nndsvd' or init == 'nndsvda':
        # not converted to da yet
        raise NotImplementedError

    if init == 'custom':
        return W, H

    if init == 'random_vcol':

        import math
        #p_c = options.get('p_c', int(ceil(1. / 5 * X.shape[1])))
        #p_r = options.get('p_r', int(ceil(1. / 5 * X.shape[0])))
        p_c = int(math.ceil(1. / 5 * X.shape[1]))
        p_r = int(math.ceil(1. / 5 * X.shape[0]))
        prng = np.random.RandomState(42)

        #W = da.zeros((X.shape[0], n_components), chunks = (X.shape[0],n_components))
        #H = da.zeros((n_components, X.shape[1]), chunks = (n_components,X.chunks[1][0]))

        W = []
        H = []

        for i in range(k):
            W.append(X[:, prng.randint(low=0, high=X.shape[1], size=p_c)].mean(
                axis=1).compute())
            H.append(X[prng.randint(low=0, high=X.shape[0], size=p_r), :].mean(
                axis=0).compute())
        W = np.stack(W, axis=1)
        H = np.stack(H, axis=0)

        return W, H
Ejemplo n.º 28
0
 def fit(self) -> None:
     """
     Returns
     -------
     None
     """
     self._center_vector = self._array.mean(axis=self._axis)
     if self._std_dist == 'normal':
         self._scale_vector = self._std_inverter(
             self._array.std(axis=self._axis))
     else:
         p = self._center_vector / 2
         self._scale_vector = self._std_inverter(da.sqrt(2 * p * (1 - p)))
     self._sym_scale_vector = self._scale_vector**2
Ejemplo n.º 29
0
def haversine_distance(pickup_latitude, pickup_longitude, dropoff_latitude, dropoff_longitude):
    x_1 = pi / 180 * pickup_latitude
    y_1 = pi / 180 * pickup_longitude
    x_2 = pi / 180 * dropoff_latitude
    y_2 = pi / 180 * dropoff_longitude

    dlon = y_2 - y_1
    dlat = x_2 - x_1
    a = sin(dlat / 2)**2 + cos(x_1) * cos(x_2) * sin(dlon / 2)**2

    c = 2 * arcsin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers

    return c * r
Ejemplo n.º 30
0
def ttest_1samp(a, popmean, axis=0, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")
    n = a.shape[axis]
    df = n - 1

    d = da.mean(a, axis) - popmean
    v = da.var(a, axis, ddof=1)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide='ignore', invalid='ignore'):
        t = da.divide(d, denom)
    t, prob = _ttest_finish(df, t)
    return delayed(Ttest_1sampResult, nout=2)(t, prob)
Ejemplo n.º 31
0
def ttest_1samp(a, popmean, axis=0, nan_policy="propagate"):
    if nan_policy != "propagate":
        raise NotImplementedError(
            "`nan_policy` other than 'propagate' have not been implemented.")
    n = a.shape[axis]
    df = n - 1

    d = da.mean(a, axis) - popmean
    v = da.var(a, axis, ddof=1)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide="ignore", invalid="ignore"):
        t = da.divide(d, denom)
    t, prob = _ttest_finish(df, t)
    return delayed(Ttest_1sampResult, nout=2)(t, prob)
Ejemplo n.º 32
0
def do_compute(seed, size=int(4e4), radius=300):
    with dask.set_options(get=dask.threaded.get):
        #da.random.seed(seed)
        #arr = (da.random.normal(0.01, 1, (size,3), chunks=size//24)-0.5)*radius
        np.random.seed(seed)
        c = (np.random.normal(0.01, 1, (size, 3)) - 0.5) * radius
        arr = da.from_array(c, chunks=c.shape[0] // NCPUS)

        diff = arr[:, np.newaxis, :] - arr[np.newaxis, :, :]
        mat = da.sqrt((diff * diff).sum(-1))

        inv6 = (1. / mat)**6
        pot = 4. * (inv6 * inv6 - inv6)
        e = da.nansum(pot) / 2.

        return e.compute(num_workers=NCPUS)
Ejemplo n.º 33
0
def test_gradient(shape, varargs, axis, edge_order):
    a = np.random.randint(0, 10, shape)
    d_a = da.from_array(a, chunks=(len(shape) * (5, )))

    r_a = np.gradient(a, *varargs, axis=axis, edge_order=edge_order)
    r_d_a = da.gradient(d_a, *varargs, axis=axis, edge_order=edge_order)

    if isinstance(axis, Number):
        assert_eq(r_d_a, r_a)
    else:
        assert len(r_d_a) == len(r_a)

        for e_r_d_a, e_r_a in zip(r_d_a, r_a):
            assert_eq(e_r_d_a, e_r_a)

        assert_eq(da.sqrt(sum(map(da.square, r_d_a))),
                  np.sqrt(sum(map(np.square, r_a))))
Ejemplo n.º 34
0
def ttest_rel(a, b, axis=0, nan_policy="propagate"):
    if nan_policy != "propagate":
        raise NotImplementedError(
            "`nan_policy` other than 'propagate' have not been implemented.")

    n = a.shape[axis]
    df = float(n - 1)

    d = (a - b).astype(np.float64)
    v = da.var(d, axis, ddof=1)
    dm = da.mean(d, axis)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide="ignore", invalid="ignore"):
        t = da.divide(dm, denom)
    t, prob = _ttest_finish(df, t)

    return delayed(Ttest_relResult, nout=2)(t, prob)
Ejemplo n.º 35
0
def ttest_rel(a, b, axis=0, nan_policy='propagate'):
    if nan_policy != 'propagate':
        raise NotImplementedError("`nan_policy` other than 'propagate' "
                                  "have not been implemented.")

    n = a.shape[axis]
    df = float(n - 1)

    d = (a - b).astype(np.float64)
    v = da.var(d, axis, ddof=1)
    dm = da.mean(d, axis)
    denom = da.sqrt(v / float(n))

    with np.errstate(divide='ignore', invalid='ignore'):
        t = da.divide(dm, denom)
    t, prob = _ttest_finish(df, t)

    return delayed(Ttest_relResult, nout=2)(t, prob)
Ejemplo n.º 36
0
def test_gradient(shape, varargs, axis, edge_order):
    a = np.random.randint(0, 10, shape)
    d_a = da.from_array(a, chunks=(len(shape) * (5,)))

    r_a = np.gradient(a, *varargs, axis=axis, edge_order=edge_order)
    r_d_a = da.gradient(d_a, *varargs, axis=axis, edge_order=edge_order)

    if isinstance(axis, Number):
        assert_eq(r_d_a, r_a)
    else:
        assert len(r_d_a) == len(r_a)

        for e_r_d_a, e_r_a in zip(r_d_a, r_a):
            assert_eq(e_r_d_a, e_r_a)

        assert_eq(
            da.sqrt(sum(map(da.square, r_d_a))),
            np.sqrt(sum(map(np.square, r_a)))
        )
Ejemplo n.º 37
0
def _equal_var_ttest_denom(v1, n1, v2, n2):
    df = n1 + n2 - 2.0
    svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df
    denom = da.sqrt(svar * (1.0 / n1 + 1.0 / n2))  # XXX: np -> da
    return df, denom
Ejemplo n.º 38
0
def test_arithmetic():
    x = np.arange(5).astype('f4') + 2
    y = np.arange(5).astype('i8') + 2
    z = np.arange(5).astype('i4') + 2
    a = da.from_array(x, chunks=(2,))
    b = da.from_array(y, chunks=(2,))
    c = da.from_array(z, chunks=(2,))
    assert eq(a + b, x + y)
    assert eq(a * b, x * y)
    assert eq(a - b, x - y)
    assert eq(a / b, x / y)
    assert eq(b & b, y & y)
    assert eq(b | b, y | y)
    assert eq(b ^ b, y ^ y)
    assert eq(a // b, x // y)
    assert eq(a ** b, x ** y)
    assert eq(a % b, x % y)
    assert eq(a > b, x > y)
    assert eq(a < b, x < y)
    assert eq(a >= b, x >= y)
    assert eq(a <= b, x <= y)
    assert eq(a == b, x == y)
    assert eq(a != b, x != y)

    assert eq(a + 2, x + 2)
    assert eq(a * 2, x * 2)
    assert eq(a - 2, x - 2)
    assert eq(a / 2, x / 2)
    assert eq(b & True, y & True)
    assert eq(b | True, y | True)
    assert eq(b ^ True, y ^ True)
    assert eq(a // 2, x // 2)
    assert eq(a ** 2, x ** 2)
    assert eq(a % 2, x % 2)
    assert eq(a > 2, x > 2)
    assert eq(a < 2, x < 2)
    assert eq(a >= 2, x >= 2)
    assert eq(a <= 2, x <= 2)
    assert eq(a == 2, x == 2)
    assert eq(a != 2, x != 2)

    assert eq(2 + b, 2 + y)
    assert eq(2 * b, 2 * y)
    assert eq(2 - b, 2 - y)
    assert eq(2 / b, 2 / y)
    assert eq(True & b, True & y)
    assert eq(True | b, True | y)
    assert eq(True ^ b, True ^ y)
    assert eq(2 // b, 2 // y)
    assert eq(2 ** b, 2 ** y)
    assert eq(2 % b, 2 % y)
    assert eq(2 > b, 2 > y)
    assert eq(2 < b, 2 < y)
    assert eq(2 >= b, 2 >= y)
    assert eq(2 <= b, 2 <= y)
    assert eq(2 == b, 2 == y)
    assert eq(2 != b, 2 != y)

    assert eq(-a, -x)
    assert eq(abs(a), abs(x))
    assert eq(~(a == b), ~(x == y))
    assert eq(~(a == b), ~(x == y))

    assert eq(da.logaddexp(a, b), np.logaddexp(x, y))
    assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
    assert eq(da.exp(b), np.exp(y))
    assert eq(da.log(a), np.log(x))
    assert eq(da.log10(a), np.log10(x))
    assert eq(da.log1p(a), np.log1p(x))
    assert eq(da.expm1(b), np.expm1(y))
    assert eq(da.sqrt(a), np.sqrt(x))
    assert eq(da.square(a), np.square(x))

    assert eq(da.sin(a), np.sin(x))
    assert eq(da.cos(b), np.cos(y))
    assert eq(da.tan(a), np.tan(x))
    assert eq(da.arcsin(b/10), np.arcsin(y/10))
    assert eq(da.arccos(b/10), np.arccos(y/10))
    assert eq(da.arctan(b/10), np.arctan(y/10))
    assert eq(da.arctan2(b*10, a), np.arctan2(y*10, x))
    assert eq(da.hypot(b, a), np.hypot(y, x))
    assert eq(da.sinh(a), np.sinh(x))
    assert eq(da.cosh(b), np.cosh(y))
    assert eq(da.tanh(a), np.tanh(x))
    assert eq(da.arcsinh(b*10), np.arcsinh(y*10))
    assert eq(da.arccosh(b*10), np.arccosh(y*10))
    assert eq(da.arctanh(b/10), np.arctanh(y/10))
    assert eq(da.deg2rad(a), np.deg2rad(x))
    assert eq(da.rad2deg(a), np.rad2deg(x))

    assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
    assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
    assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
    assert eq(da.logical_not(a < 1), np.logical_not(x < 1))
    assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
    assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
    assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
    assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))

    assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
    assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
    assert eq(da.isfinite(a), np.isfinite(x))
    assert eq(da.isinf(a), np.isinf(x))
    assert eq(da.isnan(a), np.isnan(x))
    assert eq(da.signbit(a - 3), np.signbit(x - 3))
    assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
    assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
    assert eq(da.ldexp(c, c), np.ldexp(z, z))
    assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
    assert eq(da.floor(a * 0.5), np.floor(x * 0.5))
    assert eq(da.ceil(a), np.ceil(x))
    assert eq(da.trunc(a / 2), np.trunc(x / 2))

    assert eq(da.degrees(b), np.degrees(y))
    assert eq(da.radians(a), np.radians(x))

    assert eq(da.rint(a + 0.3), np.rint(x + 0.3))
    assert eq(da.fix(a - 2.5), np.fix(x - 2.5))

    assert eq(da.angle(a + 1j), np.angle(x + 1j))
    assert eq(da.real(a + 1j), np.real(x + 1j))
    assert eq((a + 1j).real, np.real(x + 1j))
    assert eq(da.imag(a + 1j), np.imag(x + 1j))
    assert eq((a + 1j).imag, np.imag(x + 1j))
    assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
    assert eq((a + 1j * b).conj(), (x + 1j * y).conj())

    assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
    assert eq(da.fabs(b), np.fabs(y))
    assert eq(da.sign(b - 2), np.sign(y - 2))

    l1, l2 = da.frexp(a)
    r1, r2 = np.frexp(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    l1, l2 = da.modf(a)
    r1, r2 = np.modf(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    assert eq(da.around(a, -1), np.around(x, -1))
Ejemplo n.º 39
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=False,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
            lazy SVD decomposition from dask.
        output_dimension : int
            the number of significant components to keep. If None, keep all
            (only valid for SVD)
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        if bounds:
            msg = (
                "The `bounds` keyword is deprecated and will be removed "
                "in v2.0. Since version > 1.3 this has no effect.")
            warnings.warn(msg, VisibleDeprecationWarning)
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if algorithm != "svd" and output_dimension is None:
            raise ValueError("With the %s the output_dimension "
                             "must be specified" % algorithm)
        if output_dimension and blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks
        # LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)
        elif algorithm != "svd":
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=tuple(range(ndim))),
                    data.sum(axis=tuple(range(ndim, ndim + sdim))))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, ) * rbH.ndim] *\
                    rbH[(None, ) * raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # LEARN
            if algorithm == "svd":
                reproject = False
                from dask.array.linalg import svd
                try:
                    self._unfolded4decomposition = self.unfold()
                    # TODO: implement masking
                    if navigation_mask or signal_mask:
                        raise NotImplemented(
                            "Masking is not yet implemented for lazy SVD."
                        )
                    U, S, V = svd(self.data)
                    factors = V.T
                    explained_variance = S ** 2 / self.data.shape[0]
                    loadings = U * S
                finally:
                    if self._unfolded4decomposition is True:
                        self.fold()
                        self._unfolded4decomposition is False
            else:
                this_data = []
                try:
                    for chunk in progressbar(
                            self._block_iterator(
                                flat_signal=True,
                                get=get,
                                signal_mask=signal_mask,
                                navigation_mask=navigation_mask),
                            total=nblocks,
                            leave=True,
                            desc='Learn'):
                        this_data.append(chunk)
                        if len(this_data) == num_chunks:
                            thedata = np.concatenate(this_data, axis=0)
                            method(thedata)
                            this_data = []
                    if len(this_data):
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                except KeyboardInterrupt:
                    pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform

                    def post(a): return np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []

                    def post(a): return obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project

                    def post(a): return np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            if algorithm != "svd":  # Only needed for online algorithms
                try:
                    loadings = _reshuffle_mixed_blocks(
                        loadings,
                        ndim,
                        (output_dimension,),
                        nav_chunks).reshape((-1, output_dimension))
                except ValueError:
                    # In case the projection step was not finished, it's left
                    # as scrambled
                    pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        if algorithm != "svd":
            target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]
Ejemplo n.º 40
0
    def decomposition(self,
                      output_dimension,
                      normalize_poissonian_noise=False,
                      algorithm='PCA',
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=True,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        output_dimension : int
            the number of significant components to keep
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA
            from scikit-learn is run.
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        bounds : {tuple, bool}
            The (min, max) values of the data to normalize before learning.
            If tuple (min, max), those values will be used for normalization.
            If True, extremes will be looked up (expensive), default.
            If False, no normalization is done (learning may be very slow).
            If normalize_poissonian_noise is True, this cannot be True.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks

        ## LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)

        else:
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                if bounds is True:
                    bounds = False
                    # warnings.warn?
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=range(ndim)),
                    data.sum(axis=range(ndim, ndim + sdim)))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, )*rbH.ndim] *\
                        rbH[(None, )*raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # normalize the data for learning algs:
            if bounds:
                if bounds is True:
                    _min, _max = da.compute(self.data.min(), self.data.max())
                else:
                    _min, _max = bounds
                self.data = (self.data - _min) / (_max - _min)

            # LEARN
            this_data = []
            try:
                for chunk in progressbar(
                        self._block_iterator(
                            flat_signal=True,
                            get=get,
                            signal_mask=signal_mask,
                            navigation_mask=navigation_mask),
                        total=nblocks,
                        leave=True,
                        desc='Learn'):
                    this_data.append(chunk)
                    if len(this_data) == num_chunks:
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                        this_data = []
                if len(this_data):
                    thedata = np.concatenate(this_data, axis=0)
                    method(thedata)
            except KeyboardInterrupt:
                pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform
                    post = lambda a: np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []
                    post = lambda a: obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project
                    post = lambda a: np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            try:
                loadings = _reshuffle_mixed_blocks(
                    loadings,
                    ndim,
                    (output_dimension,),
                    nav_chunks).reshape((-1, output_dimension))
            except ValueError:
                # In case the projection step was not finished, it's left
                # as scrambled
                pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio