Exemple #1
0
def nanaverage(a, weights, **kwargs):
    """
    compute the weighted average with nans ignored
    """
    avg = da.nansum(a * weights, **kwargs)
    tot = da.nansum(weights, **kwargs)
    return nandiv(avg, tot)
def _weighted_spatial_average(data, cosfield):
    """ Calculate weighted spatial average. """

    if isinstance(data, xr.DataArray):
        data = data.data
    if isinstance(data, np.ndarray):
        data = da.from_array(data, chunks=(1000, 1000))
    return da.nansum(data * cosfield) / da.nansum(cosfield)
Exemple #3
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(np.nanprod(x), da.nanprod(d))
Exemple #4
0
def lj(cluster, do_forces=True, *parameters):
    if cluster.ndim == 1:
        cluster = cluster.reshape(-1, 3)

    diff = distance_matrix(cluster)
    r2 = (diff**2).sum(-1)

    energy = da.nansum(potential(r2, *parameters)) / 2.

    if do_forces:
        forces = da.nansum(gradient(r2, *parameters)[:, :, np.newaxis] * diff,
                           axis=0)
        return energy, forces
    else:
        return energy
Exemple #5
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Exemple #6
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(nanprod(x), da.nanprod(d))
Exemple #7
0
def compute_maf(X):
    r"""Compute minor allele frequencies.

    It assumes that ``X`` encodes 0, 1, and 2 representing the number
    of alleles (or dosage), or ``NaN`` to represent missing values.

    Parameters
    ----------
    X : array_like
        Genotype matrix.

    Returns
    -------
    array_like
        Minor allele frequencies.

    Examples
    --------
    .. doctest::

        >>> from numpy.random import RandomState
        >>> from limix.qc import compute_maf
        >>>
        >>> random = RandomState(0)
        >>> X = random.randint(0, 3, size=(100, 10))
        >>>
        >>> print(compute_maf(X)) # doctest: +FLOAT_CMP
        [0.49  0.49  0.445 0.495 0.5   0.45  0.48  0.48  0.47  0.435]
    """
    import dask.array as da
    import xarray as xr
    from pandas import DataFrame
    from numpy import isnan, logical_not, minimum, nansum

    if isinstance(X, da.Array):
        s0 = da.nansum(X, axis=0).compute()
        denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute()
    elif isinstance(X, DataFrame):
        s0 = X.sum(axis=0, skipna=True)
        denom = 2 * logical_not(X.isna()).sum(axis=0)
    elif isinstance(X, xr.DataArray):
        if "sample" in X.dims:
            kwargs = {"dim": "sample"}
        else:
            kwargs = {"axis": 0}
        s0 = X.sum(skipna=True, **kwargs)
        denom = 2 * logical_not(isnan(X)).sum(**kwargs)
    else:
        s0 = nansum(X, axis=0)
        denom = 2 * logical_not(isnan(X)).sum(axis=0)

    s0 = s0 / denom
    s1 = 1 - s0
    maf = minimum(s0, s1)

    if hasattr(maf, "name"):
        maf.name = "maf"

    return maf
Exemple #8
0
def test_reduction_names():
    x = da.ones(5, chunks=(2,))
    assert x.sum().name.startswith('sum')
    assert 'max' in x.max().name.split('-')[0]
    assert x.var().name.startswith('var')
    assert x.all().name.startswith('all')
    assert any(k[0].startswith('nansum') for k in da.nansum(x).dask)
    assert x.mean().name.startswith('mean')
Exemple #9
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Exemple #10
0
def test_reduction_names():
    x = da.ones(5, chunks=(2, ))
    assert x.sum().name.startswith("sum")
    assert "max" in x.max().name.split("-")[0]
    assert x.var().name.startswith("var")
    assert x.all().name.startswith("all")
    assert any(k[0].startswith("nansum") for k in da.nansum(x).dask)
    assert x.mean().name.startswith("mean")
Exemple #11
0
def test_reduction_names():
    x = da.ones(5, chunks=(2,))
    assert x.sum().name.startswith('sum')
    assert 'max' in x.max().name.split('-')[0]
    assert x.var().name.startswith('var')
    assert x.all().name.startswith('all')
    assert any(k[0].startswith('nansum') for k in da.nansum(x).dask)
    assert x.mean().name.startswith('mean')
Exemple #12
0
def evaluate(cluster, do_forces=True):
    if cluster.ndim == 1:
        cluster = cluster.reshape(-1, 3)

    if NCPUS > cluster.shape[0]:
        chunks = 1
    else:
        chunks = cluster.shape[0] // NCPUS

    darr = da.from_array(cluster, chunks=chunks)

    diff, r2, _ = distance_matrix(darr)

    energy = da.nansum(potential(r2)) / 2.

    if do_forces:
        forces = da.nansum(gradient(r2)[:, :, np.newaxis] * diff, axis=0)
        return energy.compute(), forces.compute()
    else:
        return energy.compute()
def update_velocities(position, velocity, mass, G, epsilon):
    """Calculate the interactions between all particles and update the velocities.
    
    Args:
    position (dask array): dask array of all particle positions in cartesian coordinates.
    velocity (dask array): dask array of all particle velocities in cartesian coordinates.
    mass (dask array): dask array of all particle masses.
    G (float): gravitational constant.
    epsilon (float): softening parameter.
    
    Returns:
    velocity: updated particle velocities in cartesian coordinates.
    """
    dx = da.subtract.outer(position[:, 0], position[:, 0])
    dy = da.subtract.outer(position[:, 1], position[:, 1])
    dz = da.subtract.outer(position[:, 2], position[:, 2])
    r2 = da.square(dx) + da.square(dy) + da.square(dz) + da.square(epsilon)
    #
    coef = -G * mass[:]
    ax = coef * dx
    ay = coef * dy
    az = coef * dz
    #
    ax_scaled = da.divide(ax, r2)
    ay_scaled = da.divide(ay, r2)
    az_scaled = da.divide(az, r2)
    #
    total_ax = da.nansum(ax_scaled, axis=1)
    total_ay = da.nansum(ay_scaled, axis=1)
    total_az = da.nansum(az_scaled, axis=1)
    #
    velocity_x = da.diag(da.add.outer(da.transpose(velocity)[0], total_ax))
    velocity_y = da.diag(da.add.outer(da.transpose(velocity)[1], total_ay))
    velocity_z = da.diag(da.add.outer(da.transpose(velocity)[2], total_az))
    #
    velocity = np.column_stack((velocity_x.compute(), velocity_y.compute(),
                                velocity_z.compute()))
    return velocity
Exemple #14
0
def do_compute(seed, size=int(4e4), radius=300):
    with dask.set_options(get=dask.threaded.get):
        #da.random.seed(seed)
        #arr = (da.random.normal(0.01, 1, (size,3), chunks=size//24)-0.5)*radius
        np.random.seed(seed)
        c = (np.random.normal(0.01, 1, (size, 3)) - 0.5) * radius
        arr = da.from_array(c, chunks=c.shape[0] // NCPUS)

        diff = arr[:, np.newaxis, :] - arr[np.newaxis, :, :]
        mat = da.sqrt((diff * diff).sum(-1))

        inv6 = (1. / mat)**6
        pot = 4. * (inv6 * inv6 - inv6)
        e = da.nansum(pot) / 2.

        return e.compute(num_workers=NCPUS)
Exemple #15
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, chunks=(2,))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Exemple #16
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, blockshape=(2, ))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Exemple #17
0
def compute_maf(X):
    r"""Compute minor allele frequencies.

    It assumes that ``X`` encodes 0, 1, and 2 representing the number
    of alleles (or dosage), or ``NaN`` to represent missing values.

    Parameters
    ----------
    X : array_like
        Genotype matrix.

    Returns
    -------
    array_like
        Minor allele frequencies.

    Examples
    --------
    .. doctest::

        >>> from numpy.random import RandomState
        >>> from limix.qc import compute_maf
        >>>
        >>> random = RandomState(0)
        >>> X = random.randint(0, 3, size=(100, 10))
        >>>
        >>> print(compute_maf(X)) # doctest: +FLOAT_CMP
        [0.49  0.49  0.445 0.495 0.5   0.45  0.48  0.48  0.47  0.435]
    """
    import dask.array as da
    from numpy import isnan, logical_not, minimum, nansum

    if isinstance(X, da.Array):
        s0 = da.nansum(X, axis=0).compute()
        denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute()
    else:
        s0 = nansum(X, axis=0)
        denom = 2 * logical_not(isnan(X)).sum(axis=0)
    s0 = s0 / denom
    s1 = 1 - s0
    return minimum(s0, s1)
Exemple #18
0
    def ds(self):
        if self._ds is None:
            file_exists = os.path.exists(self._result_file)

            reprocess = not file_exists or self._reprocess

            if reprocess:
                if file_exists:
                    print('Old file exists ' + self._result_file)
                    #print('Removing old file ' + self._result_file)
                    #shutil.rmtree(self._result_file)

                ds_data = OrderedDict()

                to_seconds = np.vectorize(
                    lambda x: x.seconds + x.microseconds / 1E6)

                print('Processing binary data...')
                xx, yy, zz = self._loadgrid()
                if xx is None:
                    if self._from_nc:
                        print('Processing existing netcdf...')
                        fn = self._result_file[:-5] + '_QC_raw.nc'
                        if os.path.exists(fn):
                            ds_temp = xr.open_dataset(self._result_file[:-5] +
                                                      '_QC_raw.nc',
                                                      chunks={'time': 50})
                            u = da.transpose(ds_temp['U'].data,
                                             axes=[3, 0, 1, 2])
                            v = da.transpose(ds_temp['V'].data,
                                             axes=[3, 0, 1, 2])
                            w = da.transpose(ds_temp['W'].data,
                                             axes=[3, 0, 1, 2])
                            tt = ds_temp['time']
                            te = (tt - tt[0]) / np.timedelta64(1, 's')
                            xx = ds_temp['x'].values
                            yy = ds_temp['y'].values
                            zz = ds_temp['z'].values
                        else:
                            print('USING OLD ZARR DATA')
                            ds_temp = xr.open_zarr(self._result_file)
                            u = da.transpose(ds_temp['U'].data,
                                             axes=[3, 0, 1, 2])
                            v = da.transpose(ds_temp['V'].data,
                                             axes=[3, 0, 1, 2])
                            w = da.transpose(ds_temp['W'].data,
                                             axes=[3, 0, 1, 2])
                            tt = ds_temp['time']
                            te = (tt - tt[0]) / np.timedelta64(1, 's')
                            xx = ds_temp['x'].values
                            yy = ds_temp['y'].values
                            zz = ds_temp['z'].values
                            print('ERROR: No NetCDF data found for ' +
                                  self._xml_file)
                            #return None
                            # print(u.shape)

                else:
                    tt, uvw = self._loaddata(xx, yy, zz)
                    if tt is None:
                        print('ERROR: No binary data found for ' +
                              self._xml_file)
                        return None

                    # calculate the elapsed time from the Timestamp objects and then convert to datetime64 datatype
                    te = to_seconds(tt - tt[0])
                    tt = pd.to_datetime(tt)
                    uvw = uvw.persist()
                    u = uvw[:, :, :, :, 0]
                    v = uvw[:, :, :, :, 1]
                    w = uvw[:, :, :, :, 2]


#                    u = xr.DataArray(uvw[:,:,:,:,0], coords=[tt, xx, yy, zz], dims=['time','x', 'y', 'z'],
#                                     name='U', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'})
#                    v = xr.DataArray(uvw[:,:,:,:,1], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'],
#                                     name='V', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'})
#                    w = xr.DataArray(uvw[:,:,:,:,2], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'],
#                                     name='W', attrs={'standard_name': 'upward_sea_water_velocity', 'units': 'm s-1'})

                if xx is None:
                    print('No data found')
                    return None

                u = u.persist()
                v = v.persist()
                w = w.persist()

                dx = float(xx[1] - xx[0])
                dy = float(yy[1] - yy[0])
                dz = float(zz[1] - zz[0])

                if self._norm_dims:
                    exp = self._result_root.split('/')[4]
                    runSheet = pd.read_csv('~/RunSheet-%s.csv' % exp)
                    runSheet = runSheet.set_index('RunID')
                    runDetails = runSheet.ix[int(self.run_id[-2:])]

                    T = runDetails['T (s)']
                    h = runDetails['h (m)']
                    D = runDetails['D (m)']

                    ww = te / T
                    om = 2. * np.pi / T
                    d_s = (2. * 1E-6 / om)**0.5
                    bl = 3. * np.pi / 4. * d_s

                    if exp == 'Exp6':
                        if D == 0.1:
                            dy_c = (188. + 82.) / 2
                            dx_c = 39.25
                            cx = dx_c / 1000.
                            cy = dy_c / 1000.
                        else:
                            dy_c = (806. + 287.) / 2. * 0.22
                            dx_c = 113 * 0.22
                            cx = dx_c / 1000.
                            cy = dy_c / 1000.
                    elif exp == 'Exp8':
                        dy_c = 624 * 0.22
                        dx_c = 15
                        cx = dx_c / 1000.
                        cy = dy_c / 1000.
                    xn = (xx + (D / 2. - cx)) / D
                    yn = (yy - cy) / D
                    zn = zz / h

                    xnm, ynm = np.meshgrid(xn, yn)
                    rr = np.sqrt(xnm**2. + ynm**2)
                    cylMask = rr < 0.5

                    nanPlane = np.ones(cylMask.shape)
                    nanPlane[cylMask] = np.nan
                    nanPlane = nanPlane.T
                    nanPlane = nanPlane[np.newaxis, :, :, np.newaxis]

                    u = u * nanPlane
                    v = v * nanPlane
                    w = w * nanPlane

                    if D == 0.1:
                        xInds = xn > 3.
                    else:
                        xInds = xn > 2.

                    blInd = np.argmax(zn > bl / h)
                    blPlane = int(round(blInd))

                    Ue = u[:, xInds, :, :]
                    Ue_bar = da.nanmean(Ue, axis=(1, 2, 3)).compute()
                    Ue_bl = da.nanmean(Ue[:, :, :, blPlane],
                                       axis=(1, 2)).compute()

                    inds = ~np.isnan(Ue_bl)

                    xv = ww[inds] % 1.
                    xv = xv + np.random.normal(scale=1E-6, size=xv.shape)
                    yv = Ue_bl[inds]
                    xy = np.stack([
                        np.concatenate([xv - 1., xv, xv + 1.]),
                        np.concatenate([yv, yv, yv])
                    ]).T
                    xy = xy[xy[:, 0].argsort(), :]
                    xi = np.linspace(-0.5, 1.5, len(xv) / 8)
                    n = np.nanmax(xy[:, 1])
                    # print(n)
                    # fig,ax = pl.subplots()
                    # ax.scatter(xy[:,0],xy[:,1]/n)
                    # print(xy)
                    spl = si.LSQUnivariateSpline(xy[:, 0],
                                                 xy[:, 1] / n,
                                                 t=xi,
                                                 k=3)
                    roots = spl.roots()
                    der = spl.derivative()
                    slope = der(roots)
                    inds = np.min(np.where(slope > 0))
                    dt = (roots[inds] % 1.).mean() - 0.5

                    tpx = np.arange(0, 0.5, 0.001)
                    U0_bl = np.abs(spl(tpx + dt).min() * n)
                    ws = ww - dt
                    Ue_spl = spl((ws - 0.5) % 1.0 + dt) * n * -1.0

                    #maxima = spl.derivative().roots()
                    #Umax = spl(maxima)
                    #UminIdx = np.argmin(Umax)
                    #U0_bl = np.abs(Umax[UminIdx]*n)

                    #ww_at_min = maxima[UminIdx]
                    #ws = ww - ww_at_min + 0.25

                    inds = ~np.isnan(Ue_bar)

                    xv = ww[inds] % 1.
                    xv = xv + np.random.normal(scale=1E-6, size=xv.shape)
                    yv = Ue_bar[inds]
                    xy = np.stack([
                        np.concatenate([xv - 1., xv, xv + 1.]),
                        np.concatenate([yv, yv, yv])
                    ]).T
                    xy = xy[xy[:, 0].argsort(), :]
                    xi = np.linspace(-0.5, 1.5, len(xv) / 8)
                    n = np.nanmax(xy[:, 1])
                    spl = si.LSQUnivariateSpline(xy[:, 0],
                                                 xy[:, 1] / n,
                                                 t=xi,
                                                 k=4)
                    maxima = spl.derivative().roots()
                    Umax = spl(maxima)
                    UminIdx = np.argmin(Umax)
                    U0_bar = np.abs(Umax[UminIdx] * n)

                    ww = xr.DataArray(ww, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])
                    ws = xr.DataArray(ws - 0.5, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])

                    xn = xr.DataArray(xn, coords=[
                        xx,
                    ], dims=[
                        'x',
                    ])
                    yn = xr.DataArray(yn, coords=[
                        yy,
                    ], dims=[
                        'y',
                    ])
                    zn = xr.DataArray(zn, coords=[
                        zz,
                    ], dims=[
                        'z',
                    ])

                    Ue_bar = xr.DataArray(Ue_bar,
                                          coords=[
                                              tt,
                                          ],
                                          dims=[
                                              'time',
                                          ])
                    Ue_bl = xr.DataArray(Ue_bl, coords=[
                        tt,
                    ], dims=[
                        'time',
                    ])
                    Ue_spl = xr.DataArray(Ue_spl,
                                          coords=[
                                              tt,
                                          ],
                                          dims=[
                                              'time',
                                          ])

                    ds_data['ww'] = ww
                    ds_data['ws'] = ws

                    ds_data['xn'] = xn
                    ds_data['yn'] = yn
                    ds_data['zn'] = zn

                    ds_data['Ue_bar'] = Ue_bar
                    ds_data['Ue_bl'] = Ue_bl
                    ds_data['Ue_spl'] = Ue_spl

                te = xr.DataArray(te, coords=[
                    tt,
                ], dims=[
                    'time',
                ])

                dims = ['time', 'x', 'y', 'z']
                coords = [tt, xx, yy, zz]

                ds_data['U'] = xr.DataArray(u,
                                            coords=coords,
                                            dims=dims,
                                            name='U',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['V'] = xr.DataArray(v,
                                            coords=coords,
                                            dims=dims,
                                            name='V',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['W'] = xr.DataArray(w,
                                            coords=coords,
                                            dims=dims,
                                            name='W',
                                            attrs={
                                                'standard_name':
                                                'sea_water_x_velocity',
                                                'units': 'm s-1'
                                            })
                ds_data['te'] = te

                # stdV = da.nanstd(v)
                # stdW = da.nanstd(w)
                # thres=7.
                if 'U0_bl' in locals():
                    condition = (da.fabs(v) / U0_bl >
                                 1.5) | (da.fabs(w) / U0_bl > 0.6)
                    for var in ['U', 'V', 'W']:
                        ds_data[var].data = da.where(condition, np.nan,
                                                     ds_data[var].data)

                piv_step_frame = float(
                    self._xml_root.findall('piv/stepFrame')[0].text)

                print('Calculating tensor')
                # j = jacobianConv(ds.U, ds.V, ds.W, dx, dy, dz, sigma=1.5)
                j = jacobianDask(u, v, w, piv_step_frame, dx, dy, dz)
                print('Done')
                #j = da.from_array(j,chunks=(20,-1,-1,-1,-1,-1))

                #                j = jacobianDask(uvw[:,:,:,:,0],uvw[:,:,:,:,1], uvw[:,:,:,:,2], piv_step_frame, dx, dy, dz)
                jT = da.transpose(j, axes=[0, 1, 2, 3, 5, 4])

                #                j = j.persist()
                #                jT = jT.persist()

                jacobianNorm = da.sqrt(
                    da.nansum(da.nansum(j**2., axis=-1), axis=-1))

                strainTensor = (j + jT) / 2.
                vorticityTensor = (j - jT) / 2.

                strainTensorNorm = da.sqrt(
                    da.nansum(da.nansum(strainTensor**2., axis=-1), axis=-1))
                vorticityTensorNorm = da.sqrt(
                    da.nansum(da.nansum(vorticityTensor**2., axis=-1),
                              axis=-1))
                divergence = j[:, :, :, :, 0, 0] + j[:, :, :, :, 1,
                                                     1] + j[:, :, :, :, 2, 2]
                # print(divergence)
                omx = vorticityTensor[:, :, :, :, 2, 1] * 2.
                omy = vorticityTensor[:, :, :, :, 0, 2] * 2.
                omz = vorticityTensor[:, :, :, :, 1, 0] * 2.

                divNorm = divergence / jacobianNorm

                #                divNorm = divNorm.persist()

                #                divNorm_mean = da.nanmean(divNorm)
                #                divNorm_std = da.nanstd(divNorm)

                dims = ['x', 'y', 'z']
                comp = ['u', 'v', 'w']

                ds_data['jacobian'] = xr.DataArray(
                    j,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='jacobian')

                ds_data['jacobianNorm'] = xr.DataArray(
                    jacobianNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='jacobianNorm')

                ds_data['strainTensor'] = xr.DataArray(
                    strainTensor,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='strainTensor')

                ds_data['vorticityTensor'] = xr.DataArray(
                    vorticityTensor,
                    coords=[tt, xx, yy, zz, comp, dims],
                    dims=['time', 'x', 'y', 'z', 'comp', 'dims'],
                    name='vorticityTensor')

                ds_data['vorticityNorm'] = xr.DataArray(
                    vorticityTensorNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='vorticityNorm')

                ds_data['strainNorm'] = xr.DataArray(
                    strainTensorNorm,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='strainNorm')

                ds_data['divergence'] = xr.DataArray(
                    divergence,
                    coords=[tt, xx, yy, zz],
                    dims=['time', 'x', 'y', 'z'],
                    name='divergence')

                ds_data['omx'] = xr.DataArray(omx,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omx')

                ds_data['omy'] = xr.DataArray(omy,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omy')

                ds_data['omz'] = xr.DataArray(omz,
                                              coords=[tt, xx, yy, zz],
                                              dims=['time', 'x', 'y', 'z'],
                                              name='omz')

                ds_data['divNorm'] = xr.DataArray(divNorm,
                                                  coords=[tt, xx, yy, zz],
                                                  dims=['time', 'x', 'y', 'z'],
                                                  name='divNorm')

                #                ds_data['divNorm_mean'] = xr.DataArray(divNorm_mean)
                #                ds_data['divNorm_std'] = xr.DataArray(divNorm_std)

                ds = xr.Dataset(ds_data)
                #                if self._from_nc:
                #                    for k,v in ds_temp.attrs.items():
                #                        ds.attrs[k]=v
                #ds = ds.chunk({'time': 20})

                self._append_CF_attrs(ds)
                self._append_attrs(ds)
                ds.attrs['filename'] = self._result_file

                if self._norm_dims:

                    KC = U0_bl * T / D
                    delta = (2. * np.pi * d_s) / h
                    S = delta / KC

                    ds.attrs['T'] = T
                    ds.attrs['h'] = h
                    ds.attrs['D'] = D
                    ds.attrs['U0_bl'] = U0_bl
                    ds.attrs['U0_bar'] = U0_bar
                    ds.attrs['KC'] = KC
                    ds.attrs['S'] = S
                    ds.attrs['Delta+'] = ((1E-6 * T)**0.5) / h
                    ds.attrs['Delta_l'] = 2 * np.pi * d_s
                    ds.attrs['Delta_s'] = d_s
                    ds.attrs['Re_D'] = U0_bl * D / 1E-6
                    ds.attrs['Beta'] = D**2. / (1E-6 * T)

                delta = (ds.attrs['dx'] * ds.attrs['dy'] *
                         ds.attrs['dz'])**(1. / 3.)
                dpx = (ds.attrs['pdx'] * ds.attrs['pdy'] *
                       ds.attrs['pdz'])**(1. / 3.)
                delta_px = delta / dpx
                dt = ds.attrs['piv_step_ensemble']

                #                divRMS = da.sqrt(da.nanmean((divergence * dt) ** 2.))
                #                divRMS = divRMS.persist()
                #                vorticityTensorNorm.persist()
                #                velocityError = divRMS/((3./(2.*delta_px**2.))**0.5)
                # print(da.percentile(ds_new['vorticityTensorNorm'].data.ravel(),99.))
                # print(ds_new['divRMS'])
                # print(ds_new['divNorm_mean'])
                #                vorticityError = divRMS/dt/da.percentile(vorticityTensorNorm.ravel(),99.)

                #                divNorm_mean = da.nanmean(divNorm)
                #                divNorm_std = da.nanstd(divNorm)

                # print("initial save")
                #ds.to_zarr(self._result_file,compute=False)
                #ds = xr.open_zarr(self._result_file)

                #                xstart = np.argmax(xx > 0.05)
                #                ystart = np.argmax(yy > 0.07)

                divRMS = da.sqrt(da.nanmean(
                    (divergence * dt)**2.))  #.compute()
                #divNorm = divergence / jacobianNorm
                #divNorm = divNorm.compute()
                #divNorm_mean = da.nanmean(divNorm).compute()
                #divNorm_std = da.nanstd(divNorm).compute()
                velocityError = divRMS / ((3. / (2. * delta_px**2.))**0.5)
                vortNorm = vorticityTensorNorm  #.compute()

                vorticityError = divRMS / dt / np.percentile(
                    vortNorm.ravel(), 99.)

                velocityError, vorticityError = da.compute(
                    velocityError, vorticityError)

                #ds.attrs['divNorm_mean'] = divNorm_mean
                #ds.attrs['divNorm_std'] = divNorm_std
                ds.attrs['velocityError'] = velocityError
                ds.attrs['vorticityError'] = vorticityError

                if self._norm_dims:
                    xInds = (xn > 0.5) & (xn < 2.65)
                    yInds = (yn > -0.75) & (yn < 0.75)
                else:
                    xInds = range(len(ds['x']))
                    yInds = range(len(ds['y']))
                vrms = (ds['V'][:, xInds, yInds, :]**2.).mean(
                    dim=['time', 'x', 'y', 'z'])**0.5
                wrms = (ds['W'][:, xInds, yInds, :]**2.).mean(
                    dim=['time', 'x', 'y', 'z'])**0.5
                ds.attrs['Vrms'] = float(vrms.compute())
                ds.attrs['Wrms'] = float(wrms.compute())

                #fig,ax = pl.subplots()
                #ax.plot(ds.ws,ds.Ue_spl/U0_bl,color='k')
                #ax.plot(ds.ws,ds.Ue_bl/U0_bl,color='g')
                #ax.set_xlabel(r'$t/T$')
                #ax.set_ylabel(r'$U_{bl}/U_0$')
                #fig.savefig(self._result_file[:-4] + 'png',dpi=125)
                #pl.close(fig)
                # print("second save")
                #ds.to_netcdf(self._result_file)
                ds.to_zarr(self._result_file, mode='w')

                print('Cached ' + self._result_file)

                #ds = xr.open_dataset(self._result_file,chunks={'time':20})
                ds = xr.open_zarr(self._result_file)
                ds.attrs['filename'] = self._result_file
            else:
                #ds = xr.open_dataset(self._result_file,chunks={'time':20})
                ds = xr.open_zarr(self._result_file)
                ds.attrs['filename'] = self._result_file

            self._ds = ds

        return self._ds
Exemple #19
0
def identity_by_state(
    ds: Dataset,
    *,
    call_allele_frequency: Hashable = variables.call_allele_frequency,
    merge: bool = True,
) -> Dataset:
    """Compute identity by state (IBS) probabilities between
    all pairs of samples.

    The IBS probability between a pair of individuals is the
    probability that a randomly drawn allele from the first individual
    is identical in state with a randomly drawn allele from the second
    individual at a single random locus.

    Parameters
    ----------
    ds
        Dataset containing call genotype alleles.
    call_allele_frequency
        Input variable name holding call_allele_frequency as defined by
        :data:`sgkit.variables.call_allele_frequency_spec`.
        If the variable is not present in ``ds``, it will be computed
        using :func:`call_allele_frequencies`.
    merge
        If True (the default), merge the input dataset and the computed
        output variables into a single dataset, otherwise return only
        the computed output variables.
        See :ref:`dataset_merge` for more details.

    Returns
    -------
    A dataset containing :data:`sgkit.variables.stat_identity_by_state_spec`
    which is a matrix of pairwise IBS probabilities among all samples.
    The dimensions are named ``samples_0`` and ``samples_1``.

    Raises
    ------
    NotImplementedError
        If the variable holding call_allele_frequency is chunked along the
        samples dimension.

    Warnings
    --------
    This method does not currently support datasets that are chunked along the
    samples dimension.

    Examples
    --------

    >>> import sgkit as sg
    >>> ds = sg.simulate_genotype_call_dataset(n_variant=2, n_sample=3, seed=2)
    >>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE
    samples    S0   S1   S2
    variants
    0         0/0  1/1  1/0
    1         1/1  1/1  1/0
    >>> sg.identity_by_state(ds)["stat_identity_by_state"].values # doctest: +NORMALIZE_WHITESPACE
    array([[1. , 0.5, 0.5],
           [0.5, 1. , 0.5],
           [0.5, 0.5, 0.5]])
    """
    ds = define_variable_if_absent(
        ds,
        variables.call_allele_frequency,
        call_allele_frequency,
        call_allele_frequencies,
    )
    variables.validate(
        ds, {call_allele_frequency: variables.call_allele_frequency_spec}
    )
    af = da.asarray(ds[call_allele_frequency])
    if len(af.chunks[1]) > 1:
        raise NotImplementedError(
            "identity_by_state does not support chunking in the samples dimension"
        )
    af0 = da.where(da.isnan(af), 0.0, af)
    num = da.einsum("ixj,iyj->xy", af0, af0)
    called = da.nansum(af, axis=-1)
    count = da.einsum("ix,iy->xy", called, called)
    denom = da.where(count == 0, np.nan, count)
    new_ds = create_dataset(
        {
            variables.stat_identity_by_state: (
                ("samples_0", "samples_1"),
                num / denom,
            )
        }
    )
    return conditional_merge_datasets(ds, new_ds, merge)
Exemple #20
0
def Weir_Goudet_beta(
    ds: Dataset,
    *,
    stat_identity_by_state: Hashable = variables.stat_identity_by_state,
    merge: bool = True,
) -> Dataset:
    """Estimate pairwise beta between all pairs of samples as described
    in Weir and Goudet 2017 [1].

    Beta is the kinship scaled by the average kinship of all pairs of
    individuals in the dataset such that the non-diagonal (non-self) values
    sum to zero.

    Beta may be corrected to more accurately reflect pedigree based kinship
    estimates using the formula
    :math:`\\hat{\\beta}^c=\\frac{\\hat{\\beta}-\\hat{\\beta}_0}{1-\\hat{\\beta}_0}`
    where :math:`\\hat{\\beta}_0` is the estimated beta between samples which are
    known to be unrelated [1].

    Parameters
    ----------
    ds
        Genotype call dataset.
    stat_identity_by_state
        Input variable name holding stat_identity_by_state as defined
        by :data:`sgkit.variables.stat_identity_by_state_spec`.
        If the variable is not present in ``ds``, it will be computed
        using :func:`identity_by_state`.
    merge
        If True (the default), merge the input dataset and the computed
        output variables into a single dataset, otherwise return only
        the computed output variables.
        See :ref:`dataset_merge` for more details.

    Returns
    -------
    A dataset containing :data:`sgkit.variables.stat_Weir_Goudet_beta_spec`
    which is a matrix of estimated pairwise kinship relative to the average
    kinship of all pairs of individuals in the dataset.
    The dimensions are named ``samples_0`` and ``samples_1``.

    Examples
    --------

    >>> import sgkit as sg
    >>> ds = sg.simulate_genotype_call_dataset(n_variant=3, n_sample=3, n_allele=10, seed=3)
    >>> # sample 2 "inherits" alleles from samples 0 and 1
    >>> ds.call_genotype.data[:, 2, 0] = ds.call_genotype.data[:, 0, 0]
    >>> ds.call_genotype.data[:, 2, 1] = ds.call_genotype.data[:, 1, 0]
    >>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE
    samples    S0   S1   S2
    variants
    0         7/1  8/6  7/8
    1         9/5  3/6  9/3
    2         8/8  8/3  8/8
    >>> # estimate beta
    >>> ds = sg.Weir_Goudet_beta(ds).compute()
    >>> ds.stat_Weir_Goudet_beta.values # doctest: +NORMALIZE_WHITESPACE
    array([[ 0.5 , -0.25,  0.25],
           [-0.25,  0.25,  0.  ],
           [ 0.25,  0.  ,  0.5 ]])
    >>> # correct beta assuming least related samples are unrelated
    >>> beta = ds.stat_Weir_Goudet_beta
    >>> beta0 = beta.min()
    >>> beta_corrected = (beta - beta0) / (1 - beta0)
    >>> beta_corrected.values # doctest: +NORMALIZE_WHITESPACE
    array([[0.6, 0. , 0.4],
           [0. , 0.4, 0.2],
           [0.4, 0.2, 0.6]])

    References
    ----------
    [1] - Bruce, S. Weir, and Jérôme Goudet 2017.
    "A Unified Characterization of Population Structure and Relatedness."
    Genetics 206 (4): 2085-2103.
    """
    ds = define_variable_if_absent(
        ds, variables.stat_identity_by_state, stat_identity_by_state, identity_by_state
    )
    variables.validate(
        ds, {stat_identity_by_state: variables.stat_identity_by_state_spec}
    )
    ibs = ds[stat_identity_by_state].data
    # average matching is the mean of non-diagonal elements
    num = da.nansum(da.tril(ibs, -1))
    denom = da.nansum(da.tril(~da.isnan(ibs), -1))
    avg = num / denom
    beta = (ibs - avg) / (1 - avg)
    new_ds = create_dataset(
        {
            variables.stat_Weir_Goudet_beta: (
                ("samples_0", "samples_1"),
                beta,
            )
        }
    )
    return conditional_merge_datasets(ds, new_ds, merge)
Exemple #21
0
def interm(x, y, axis=None):
    n = da.nansum((x > 0.1) & (y > 0.1) & ~da.isnan(x) & ~da.isnan(y),
                  axis=axis)
    o = da.nansum(((x > 0.1) | (y > 0.1)) & ~da.isnan(x) & ~da.isnan(y),
                  axis=axis)
    return n / o
Exemple #22
0
def potential_dask(cluster):
    d2 = distances(cluster)
    energy = da.nansum(lj(d2))/2.
    return energy
import rasterio
import glob
from dask_rasterio import read_raster, write_raster
import dask.array as da

earthstat_dir  = "C:/Users/angel/DATA/Earthstat/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/"
layer = "Production"
ext = ".tif"
selected_files = [file for file in glob.iglob(earthstat_dir + '**/*' + layer + ext, recursive=True)]
map2array=[]
for raster in selected_files:
    map2array.append(read_raster(raster))

ds_stack = da.stack(map2array)
with rasterio.open(selected_files[0]) as src:
    profile = src.profile
    profile.update(compress='lzw')

write_raster(earthstat_dir + "Sum" + layer + ".tif", da.nansum(ds_stack,0), **profile)