def test_apply_along_axis(func1d_name, func1d, shape, axis):
    a = np.random.randint(0, 10, shape)
    d = da.from_array(a, chunks=(len(shape) * (5, )))

    if (func1d_name == "range2"
            and LooseVersion(np.__version__) < LooseVersion("1.13.0")):
        with pytest.raises(ValueError):
            da.apply_along_axis(func1d, axis, d)
    else:
        assert_eq(da.apply_along_axis(func1d, axis, d),
                  np.apply_along_axis(func1d, axis, a))
Beispiel #2
0
def test_apply_along_axis(func1d_name, func1d, shape, axis):
    a = np.random.randint(0, 10, shape)
    d = da.from_array(a, chunks=(len(shape) * (5,)))

    if (func1d_name == "range2" and
            LooseVersion(np.__version__) < LooseVersion("1.13.0")):
        with pytest.raises(ValueError):
            da.apply_along_axis(func1d, axis, d)
    else:
        assert_eq(
            da.apply_along_axis(func1d, axis, d),
            np.apply_along_axis(func1d, axis, a)
        )
Beispiel #3
0
def local_times(coords, time_index, hour_range=(7, 17)):
    """Return a 2d array of local date times for a set of points between
    8 AM and 5 PM.
    """
    # Day One
    t1 = time_index[0][:16].decode("utf-8")
    d1 = dt.datetime.strptime(t1, "%Y-%m-%dT%H:%M")
    coords["day1"] = coords["zone"].apply(lambda x: d1 + dt.timedelta(hours=x))

    # An array with every day in the hour range would be useful...
    # An array of first days
    #    first_days = [pd.Timestamp(d).to_pydatetime() for d in coords["day1"]]
    first_days = coords["day1"].values

    # Every half increment in a day for one start date
    rt = range(len(time_index))
    periods = [30 * i for i in rt]

    def single(d):
        return np.array([d + np.timedelta64(periods[i], "m") for i in rt])

    # Longest way
    ndarray = []
    for i in tqdm(range(len(first_days))):
        array = single(first_days[i])
        ndarray.append(array)

    # With dask?
    ddays = da.from_array(first_days, chunks="auto")
    fddays = da.apply_along_axis(single, 0, ddays, dtype="<M8[ns]")
Beispiel #4
0
def least_squares(lhs, rhs, rcond=None, skipna=False):
    import dask.array as da

    lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1]))
    if skipna:
        added_dim = rhs.ndim == 1
        if added_dim:
            rhs = rhs.reshape(rhs.shape[0], 1)
        results = da.apply_along_axis(
            nputils._nanpolyfit_1d,
            0,
            rhs,
            lhs_da,
            dtype=float,
            shape=(lhs.shape[1] + 1, ),
            rcond=rcond,
        )
        coeffs = results[:-1, ...]
        residuals = results[-1, ...]
        if added_dim:
            coeffs = coeffs.reshape(coeffs.shape[0])
            residuals = residuals.reshape(residuals.shape[0])
    else:
        coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs)
    return coeffs, residuals
Beispiel #5
0
def test_apply_along_axis(func1d_name, func1d, shape, axis):
    a = np.random.randint(0, 10, shape)
    d = da.from_array(a, chunks=(len(shape) * (5,)))
    assert_eq(
        da.apply_along_axis(func1d, axis, d),
        np.apply_along_axis(func1d, axis, a)
    )
Beispiel #6
0
def detrend(cube, dimension='time', method='linear'):
    """
    Detrend data along a given dimension.
    Parameters
    ----------
    cube: iris.cube.Cube
        input cube.
    dimension: str
        Dimension to detrend
    method: str
        Method to detrend. Available: linear, constant. See documentation of
        'scipy.signal.detrend' for details
    Returns
    -------
    iris.cube.Cube
        Detrended cube
    """
    coord = cube.coord(dimension)
    axis = cube.coord_dims(coord)[0]
    detrended = da.apply_along_axis(
        scipy.signal.detrend,
        axis=axis,
        arr=cube.lazy_data(),
        type=method,
        shape=(cube.shape[axis],)
    )
    return cube.copy(detrended)
Beispiel #7
0
def test_apply_along_axis(func1d_name, func1d, shape, axis):
    a = np.random.randint(0, 10, shape)
    d = da.from_array(a, chunks=(len(shape) * (5,)))
    assert_eq(
        da.apply_along_axis(func1d, axis, d),
        np.apply_along_axis(func1d, axis, a)
    )
Beispiel #8
0
def calibrate_posterior_predictive(post_pred, qc):
    """ Function to calibrate posterior predictive.

    This allows the calibrated model to make predictions. This function is required to compute
    mean and log likelihood of the calibrated model.

    Args:
        post_pred: posterior predictive of shape (num samples, num X values)
        qc: calibration object as defined in class QuantileCalibration

    Returns:
        calibrated posterior predictive of shape (num samples, num X values)
    """

    # Need to convert from jax array to dask array to avoid
    # out of memory error (on a 32GB machine for 8000 samples) in the next step.
    # This also helps to parallelize the task to all cpu cores.
    post_pred_shape = post_pred.shape
    res_main_post_pred = da.from_array(
        np.array(post_pred),
        chunks=(
            1000,  # reduce this value if out of memory!
            np.ceil(post_pred_shape[1] / dask.system.cpu_count()),
        ),
    )
    # expand to 3D: axis 0: num observations; axis 1: num samples; axis 2: num samples
    uncalibrated_pp_quantiles = (
        da.sum(res_main_post_pred.T[:, :, np.newaxis] <=
               res_main_post_pred.T[:, np.newaxis, :],
               axis=1).T / post_pred_shape[0])

    # calculate inverse R
    inverse_calibrated_pp_quantiles = da.apply_along_axis(
        qc.inverse_transform, 0, uncalibrated_pp_quantiles)

    # inverse CDF by looking up existing samples with np.quantile()
    da_combined = da.vstack(
        [res_main_post_pred,
         inverse_calibrated_pp_quantiles.compute()])
    calibrated_post_pred = da.apply_along_axis(
        lambda q: np.quantile(
            q[:post_pred_shape[0]], q[post_pred_shape[0]:], axis=0),
        0,
        da_combined,
    ).compute()

    return calibrated_post_pred
Beispiel #9
0
 def apply_distance_using_dask(t1,f1):
     # Compute the grid for any configuration.
     import numpy as np 
     import dask.array as da
     # gps = da.stack(da.meshgrid(da.linspace(-0.5,0.5, t1.nx_total), da.linspace(-0.5,0.5, t1.ny_total),da.linspace(-0.5,0.5, t1.nz_total)), -1).reshape(-1, 3)
     gps = da.stack(da.meshgrid(t1.x_grid, t1.y_grid,t1.z_grid, indexing='ij'), -1).reshape(-1, 3) # * Only the unit cell.
     gps =gps.rechunk(10000,3)
     grid = da.apply_along_axis(func1d=grid_point_distance, frameda=da.from_array(t1.coord),  Ada=da.from_array(t1.A),sig = da.from_array(f1.sigma), sigda=da.from_array(f1.sigma_array), epsda=da.from_array(f1.epsilon_array), axis=1, arr=gps)
     return grid
Beispiel #10
0
def detrend(cube, dimension='time', method='linear'):
    coord = cube.coord(dimension)
    axis = cube.coord_dims(coord)[0]
    detrended = da.apply_along_axis(scipy.signal.detrend,
                                    axis=axis,
                                    arr=cube.lazy_data(),
                                    type=method,
                                    shape=(cube.shape[axis], ))
    return cube.copy(detrended)
Beispiel #11
0
def _qg_dask_array(x, axis, inplace):
    import dask.array as da
    from scipy.stats import norm
    from numpy_sugar import nanrankdata

    if inplace:
        raise NotImplementedError()

    x = x.swapaxes(1, axis)

    x = dask.array_shape_reveal(x)
    shape = da.compute(*x.shape)
    x = da.ma.masked_array(x)
    x *= -1
    x = da.apply_along_axis(_dask_apply, 0, x, nanrankdata, shape[0])
    x = x / (da.isfinite(x).sum(axis=0) + 1)
    x = da.apply_along_axis(_dask_apply, 0, x, norm.isf, shape[0])

    return x.swapaxes(1, axis)
Beispiel #12
0
 def lazy_func(data, axis, x_data):
     """Calculate trend standard error lazily."""
     trend_std_arr = da.apply_along_axis(_get_slope_stderr,
                                         axis,
                                         data,
                                         x_data,
                                         dtype=data.dtype,
                                         shape=())
     trend_std_arr = da.ma.masked_invalid(trend_std_arr)
     return trend_std_arr
Beispiel #13
0
def code_range(path_dict):
    """
    path_dict = RANGE_CATEGORIES[5]
    """

    # Create a numeric dictionary for these keys
    key_dict = {key: i + 1 for i, key in enumerate(path_dict.keys())}
    number_dict = {k: i for i, k in key_dict.items()}
    vals = key_dict.values()
    combos = [[c for c in combinations(vals, i + 1)] for i in range(len(vals))]
    combos = [c for sc in combos for c in sc]
    combo_keys = {}
    for combo in combos:
        key = "-".join([number_dict[c] for c in combo])
        value = seq_int(combo)
        combo_keys[key] = value

    # Assign each raster a unique value
    arrays = []
    for key, path in path_dict.items():
        value = key_dict[key]
        full_path = DP.join(path)
        array = xr.open_rasterio(full_path, chunks=CHUNKS)[0].data
        array[da.isnan(array)] = 0
        array[array > 0] = value
        arrays.append(array)

    # Stack everything together - we might have to save this a temporary file
    stack = da.stack(arrays, axis=0)
    stack = stack.rechunk((stack.shape[0], 5000, 5000))
    stack = stack[:, 4000:10000, 4000:10000]

    # Try to map the function to each point
    client = Client()
    codes = da.apply_along_axis(seq_int, 0, stack, dtype="uint8")
    future = client.compute(codes)
    result = future.result()
    client.shutdown()
    client.close()

    # Save to temp and delete
    template = rasterio.open(full_path)
    temp_path = DP2.join("test.tif")
    with rasterio.Env():
        profile = template.profile
        profile.update(dtype=rasterio.uint8, count=1, compress='lzw')
        with rasterio.open(temp_path, 'w', **profile) as dst:
            dst.write(result)
Beispiel #14
0
    def xarray_MK_trend(self):
        """
        Computes linear trend over 'dim' of xr.dataarray.
        Slope and intercept of the least square fit are added to a 
        array which contains the slope, significance mask and p-test.
        """
        da = self.DataArray.copy().transpose(*self.ordered_dims)
        axis_num = da.get_axis_num(self.dim)

        data = dsa.apply_along_axis(self._calc_slope_MK,
                                    axis_num,
                                    da.data,
                                    dtype=np.float64,
                                    shape=(4, ))

        return data
Beispiel #15
0
def test_apply_along_axis(func1d_name, func1d, specify_output_props, input_shape, axis):
    a = np.random.randint(0, 10, input_shape)
    d = da.from_array(a, chunks=(len(input_shape) * (5,)))

    output_shape = None
    output_dtype = None

    if specify_output_props:
        slices = [0] * a.ndim
        slices[axis] = slice(None)
        slices = tuple(slices)
        sample = np.array(func1d(a[slices]))
        output_shape = sample.shape
        output_dtype = sample.dtype

    assert_eq(
        da.apply_along_axis(func1d, axis, d, dtype=output_dtype, shape=output_shape),
        np.apply_along_axis(func1d, axis, a),
    )
Beispiel #16
0
def to_xarray(zarr_data: xr.Dataset, channel_index: int,
              bottom_depth: xr.DataArray, attributes: dict) -> xr.Dataset:
    heave_corrected_transducer_depth = zarr_data['heave'] + zarr_data[
        'transducer_draft'][channel_index]
    bottom_range = bottom_depth - heave_corrected_transducer_depth

    # Get indices of the bottom_range
    bottom_range_idx = da.searchsorted(da.from_array(zarr_data.range),
                                       bottom_range.data)

    # Append indices to the last range
    bottom_range_1 = zarr_data.sv.isel(frequency=0).data
    bottom_range_1[:, -1] = bottom_range_idx

    # Convert annotation to 2d array
    bottom_range_2 = da.apply_along_axis(_bottom_2d,
                                         1,
                                         bottom_range_1,
                                         dtype='float32',
                                         shape=(bottom_range_1.shape[1], ))

    # Create dataset
    ds = xr.Dataset(
        data_vars=dict(bottom_range=(['ping_time',
                                      'range'], bottom_range_2), ),
        coords=dict(
            frequency=zarr_data['frequency'][0],
            range=zarr_data['range'],
            ping_time=zarr_data['ping_time'],
        ),
    )

    # Remove unused dims
    remove_list = list(
        filter(lambda s: s not in ['frequency', 'ping_time', 'range'],
               list(ds.coords)))
    ds = ds.drop_vars(remove_list)

    for key in attributes.keys():
        ds.attrs[key] = attributes[key]

    return ds
Beispiel #17
0
def _mean_standardize_dask_array(x, axis, inplace):
    import dask.array as da
    from numpy_sugar import epsilon
    from numpy import nanmean, clip, nanstd, inf

    if inplace:
        raise NotImplementedError()

    x = x.swapaxes(1, axis)

    x = dask.array_shape_reveal(x)
    shape = da.compute(*x.shape)

    def func(a):
        a -= nanmean(a, axis=0)
        a /= clip(nanstd(a, axis=0), epsilon.tiny, inf)
        return a

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        x = da.apply_along_axis(_dask_apply, 0, x, func, shape[0])

    return x.swapaxes(1, axis)
Beispiel #18
0
    pass
logger.info('Initiating temporary file - ' + tmp_f)
f = h5py.File(tmp_f)
betavec = f.create_dataset('betavec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1))
zvec = f.create_dataset('zvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1))
meanvec = f.create_dataset('meanvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1))
hetvec = f.create_dataset('hetvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1))

#####################################Calculation by Chunk ##############
#
# Begin chunk processing
for chunk1 in chunk_array:
  print('Processing Chunk - ' + str(chunk_ind) + ' of ' + str(nchunk))
  # Compute 
  geno_chunk = geno_ia[chunk1,:]
  g = da.apply_along_axis(SimpleImpute, 1, geno_chunk)
  with ProgressBar():
    results = da.compute(g, get=get)
  gtmp = np.stack(results[0][:,0])
  if args.apr_flag == 'N':
    vtmp = da.diag(da.matmul(gtmp, da.matmul(gtmp, C).transpose())).compute()
  else:
    vtmp = np.diagonal(np.matmul(gtmp, gtmp.transpose())) # assuming difference is identity 
  beta_tmp = np.matmul(gtmp, res_surv)
  betavec[chunk1, 0] = beta_tmp
  zvec[chunk1, 0] = beta_tmp/np.sqrt(vtmp)
  meanvec[chunk1, 0] = np.stack(results[0][:,1])
  hetvec[chunk1, 0] = np.stack(results[0][:,2])
  chunk_ind += 1

##########################################################################
Beispiel #19
0
"""
Function:
Author: Du Fei
Create Time: 2020/5/31 11:08
"""

import numpy as np
import dask.array as da
from dask.dataframe.utils import make_meta

if __name__ == '__main__':
    source_array = np.random.randint(0, 10, (2, 4))
    index_array = np.asarray([[0, 0], [1, 0], [2, 1], [3, 2]])

    b = np.apply_along_axis(lambda a: a[index_array], 1, source_array)
    print(b)

    source_array = da.from_array(source_array)
    # b = da.apply_along_axis(lambda a: a[index_array], 1, source_array)

    res = da.apply_along_axis(lambda a: a[index_array],
                              1,
                              source_array,
                              shape=make_meta(source_array).shape,
                              dtype=make_meta(source_array).dtype).compute()

    print(res)
Beispiel #20
0
        print(res)

        #        start = time.time()
        #        res = d_da.map_blocks(
        #            lambda df: ruzicka_mat(cp.array(df), vector_new), dtype=cp.float32
        #        ).compute()
        #        print("dask_da_cupy:", time.time() - start)
        #        print(res)

    if cupy_raw:
        # 3 CUPY
        start = time.time()
        y = cp.zeros(len(d_da), dtype=cp.float32)
        ruzicka_kernel(
            ((len(d_da) + 31) // 32, (len(d_da) + 31) // 32),
            (32, 32),
            (cp.array(d_da), vector_new, y, 1024, len(d_da)),
        )
        print("cupy_raw:", time.time() - start)
        #      print(y, len(y))

    if dask_apply:
        # 4 DASK ARRAY WITH apply_along_axis
        start = time.time()
        res = da.apply_along_axis(lambda df: ruzicka_vec(df, vector_new), 1,
                                  d_da).compute()
        print("dask_apply:", time.time() - start)
        #     print(res, y)

    client.close()