def nanaverage(a, weights, **kwargs): """ compute the weighted average with nans ignored """ avg = da.nansum(a * weights, **kwargs) tot = da.nansum(weights, **kwargs) return nandiv(avg, tot)
def _weighted_spatial_average(data, cosfield): """ Calculate weighted spatial average. """ if isinstance(data, xr.DataArray): data = data.data if isinstance(data, np.ndarray): data = da.from_array(data, chunks=(1000, 1000)) return da.nansum(data * cosfield) / da.nansum(cosfield)
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert_eq(np.nansum(x), da.nansum(d)) assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert_eq(np.nanvar(x), da.nanvar(d)) assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) assert_eq(np.nanprod(x), da.nanprod(d))
def lj(cluster, do_forces=True, *parameters): if cluster.ndim == 1: cluster = cluster.reshape(-1, 3) diff = distance_matrix(cluster) r2 = (diff**2).sum(-1) energy = da.nansum(potential(r2, *parameters)) / 2. if do_forces: forces = da.nansum(gradient(r2, *parameters)[:, :, np.newaxis] * diff, axis=0) return energy, forces else: return energy
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, blockshape=(2, 2)) assert eq(np.nansum(x), da.nansum(d)) assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert eq(np.nanvar(x), da.nanvar(d)) assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) with ignoring(AttributeError): assert eq(np.nanprod(x), da.nanprod(d))
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert_eq(np.nansum(x), da.nansum(d)) assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert_eq(np.nanvar(x), da.nanvar(d)) assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) assert_eq(nanprod(x), da.nanprod(d))
def compute_maf(X): r"""Compute minor allele frequencies. It assumes that ``X`` encodes 0, 1, and 2 representing the number of alleles (or dosage), or ``NaN`` to represent missing values. Parameters ---------- X : array_like Genotype matrix. Returns ------- array_like Minor allele frequencies. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from limix.qc import compute_maf >>> >>> random = RandomState(0) >>> X = random.randint(0, 3, size=(100, 10)) >>> >>> print(compute_maf(X)) # doctest: +FLOAT_CMP [0.49 0.49 0.445 0.495 0.5 0.45 0.48 0.48 0.47 0.435] """ import dask.array as da import xarray as xr from pandas import DataFrame from numpy import isnan, logical_not, minimum, nansum if isinstance(X, da.Array): s0 = da.nansum(X, axis=0).compute() denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute() elif isinstance(X, DataFrame): s0 = X.sum(axis=0, skipna=True) denom = 2 * logical_not(X.isna()).sum(axis=0) elif isinstance(X, xr.DataArray): if "sample" in X.dims: kwargs = {"dim": "sample"} else: kwargs = {"axis": 0} s0 = X.sum(skipna=True, **kwargs) denom = 2 * logical_not(isnan(X)).sum(**kwargs) else: s0 = nansum(X, axis=0) denom = 2 * logical_not(isnan(X)).sum(axis=0) s0 = s0 / denom s1 = 1 - s0 maf = minimum(s0, s1) if hasattr(maf, "name"): maf.name = "maf" return maf
def test_reduction_names(): x = da.ones(5, chunks=(2,)) assert x.sum().name.startswith('sum') assert 'max' in x.max().name.split('-')[0] assert x.var().name.startswith('var') assert x.all().name.startswith('all') assert any(k[0].startswith('nansum') for k in da.nansum(x).dask) assert x.mean().name.startswith('mean')
def test_reduction_names(): x = da.ones(5, chunks=(2, )) assert x.sum().name.startswith("sum") assert "max" in x.max().name.split("-")[0] assert x.var().name.startswith("var") assert x.all().name.startswith("all") assert any(k[0].startswith("nansum") for k in da.nansum(x).dask) assert x.mean().name.startswith("mean")
def evaluate(cluster, do_forces=True): if cluster.ndim == 1: cluster = cluster.reshape(-1, 3) if NCPUS > cluster.shape[0]: chunks = 1 else: chunks = cluster.shape[0] // NCPUS darr = da.from_array(cluster, chunks=chunks) diff, r2, _ = distance_matrix(darr) energy = da.nansum(potential(r2)) / 2. if do_forces: forces = da.nansum(gradient(r2)[:, :, np.newaxis] * diff, axis=0) return energy.compute(), forces.compute() else: return energy.compute()
def update_velocities(position, velocity, mass, G, epsilon): """Calculate the interactions between all particles and update the velocities. Args: position (dask array): dask array of all particle positions in cartesian coordinates. velocity (dask array): dask array of all particle velocities in cartesian coordinates. mass (dask array): dask array of all particle masses. G (float): gravitational constant. epsilon (float): softening parameter. Returns: velocity: updated particle velocities in cartesian coordinates. """ dx = da.subtract.outer(position[:, 0], position[:, 0]) dy = da.subtract.outer(position[:, 1], position[:, 1]) dz = da.subtract.outer(position[:, 2], position[:, 2]) r2 = da.square(dx) + da.square(dy) + da.square(dz) + da.square(epsilon) # coef = -G * mass[:] ax = coef * dx ay = coef * dy az = coef * dz # ax_scaled = da.divide(ax, r2) ay_scaled = da.divide(ay, r2) az_scaled = da.divide(az, r2) # total_ax = da.nansum(ax_scaled, axis=1) total_ay = da.nansum(ay_scaled, axis=1) total_az = da.nansum(az_scaled, axis=1) # velocity_x = da.diag(da.add.outer(da.transpose(velocity)[0], total_ax)) velocity_y = da.diag(da.add.outer(da.transpose(velocity)[1], total_ay)) velocity_z = da.diag(da.add.outer(da.transpose(velocity)[2], total_az)) # velocity = np.column_stack((velocity_x.compute(), velocity_y.compute(), velocity_z.compute())) return velocity
def do_compute(seed, size=int(4e4), radius=300): with dask.set_options(get=dask.threaded.get): #da.random.seed(seed) #arr = (da.random.normal(0.01, 1, (size,3), chunks=size//24)-0.5)*radius np.random.seed(seed) c = (np.random.normal(0.01, 1, (size, 3)) - 0.5) * radius arr = da.from_array(c, chunks=c.shape[0] // NCPUS) diff = arr[:, np.newaxis, :] - arr[np.newaxis, :, :] mat = da.sqrt((diff * diff).sum(-1)) inv6 = (1. / mat)**6 pot = 4. * (inv6 * inv6 - inv6) e = da.nansum(pot) / 2. return e.compute(num_workers=NCPUS)
def test_reductions(): x = np.arange(5).astype('f4') a = da.from_array(x, chunks=(2,)) assert eq(da.all(a), np.all(x)) assert eq(da.any(a), np.any(x)) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.max(a), np.max(x)) assert eq(da.mean(a), np.mean(x)) assert eq(da.min(a), np.min(x)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.nanmax(a), np.nanmax(x)) assert eq(da.nanmin(a), np.nanmin(x)) assert eq(da.nansum(a), np.nansum(x)) assert eq(da.nanvar(a), np.nanvar(x)) assert eq(da.nanstd(a), np.nanstd(x))
def test_reductions(): x = np.arange(5).astype('f4') a = da.from_array(x, blockshape=(2, )) assert eq(da.all(a), np.all(x)) assert eq(da.any(a), np.any(x)) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.max(a), np.max(x)) assert eq(da.mean(a), np.mean(x)) assert eq(da.min(a), np.min(x)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.nanmax(a), np.nanmax(x)) assert eq(da.nanmin(a), np.nanmin(x)) assert eq(da.nansum(a), np.nansum(x)) assert eq(da.nanvar(a), np.nanvar(x)) assert eq(da.nanstd(a), np.nanstd(x))
def compute_maf(X): r"""Compute minor allele frequencies. It assumes that ``X`` encodes 0, 1, and 2 representing the number of alleles (or dosage), or ``NaN`` to represent missing values. Parameters ---------- X : array_like Genotype matrix. Returns ------- array_like Minor allele frequencies. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from limix.qc import compute_maf >>> >>> random = RandomState(0) >>> X = random.randint(0, 3, size=(100, 10)) >>> >>> print(compute_maf(X)) # doctest: +FLOAT_CMP [0.49 0.49 0.445 0.495 0.5 0.45 0.48 0.48 0.47 0.435] """ import dask.array as da from numpy import isnan, logical_not, minimum, nansum if isinstance(X, da.Array): s0 = da.nansum(X, axis=0).compute() denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute() else: s0 = nansum(X, axis=0) denom = 2 * logical_not(isnan(X)).sum(axis=0) s0 = s0 / denom s1 = 1 - s0 return minimum(s0, s1)
def ds(self): if self._ds is None: file_exists = os.path.exists(self._result_file) reprocess = not file_exists or self._reprocess if reprocess: if file_exists: print('Old file exists ' + self._result_file) #print('Removing old file ' + self._result_file) #shutil.rmtree(self._result_file) ds_data = OrderedDict() to_seconds = np.vectorize( lambda x: x.seconds + x.microseconds / 1E6) print('Processing binary data...') xx, yy, zz = self._loadgrid() if xx is None: if self._from_nc: print('Processing existing netcdf...') fn = self._result_file[:-5] + '_QC_raw.nc' if os.path.exists(fn): ds_temp = xr.open_dataset(self._result_file[:-5] + '_QC_raw.nc', chunks={'time': 50}) u = da.transpose(ds_temp['U'].data, axes=[3, 0, 1, 2]) v = da.transpose(ds_temp['V'].data, axes=[3, 0, 1, 2]) w = da.transpose(ds_temp['W'].data, axes=[3, 0, 1, 2]) tt = ds_temp['time'] te = (tt - tt[0]) / np.timedelta64(1, 's') xx = ds_temp['x'].values yy = ds_temp['y'].values zz = ds_temp['z'].values else: print('USING OLD ZARR DATA') ds_temp = xr.open_zarr(self._result_file) u = da.transpose(ds_temp['U'].data, axes=[3, 0, 1, 2]) v = da.transpose(ds_temp['V'].data, axes=[3, 0, 1, 2]) w = da.transpose(ds_temp['W'].data, axes=[3, 0, 1, 2]) tt = ds_temp['time'] te = (tt - tt[0]) / np.timedelta64(1, 's') xx = ds_temp['x'].values yy = ds_temp['y'].values zz = ds_temp['z'].values print('ERROR: No NetCDF data found for ' + self._xml_file) #return None # print(u.shape) else: tt, uvw = self._loaddata(xx, yy, zz) if tt is None: print('ERROR: No binary data found for ' + self._xml_file) return None # calculate the elapsed time from the Timestamp objects and then convert to datetime64 datatype te = to_seconds(tt - tt[0]) tt = pd.to_datetime(tt) uvw = uvw.persist() u = uvw[:, :, :, :, 0] v = uvw[:, :, :, :, 1] w = uvw[:, :, :, :, 2] # u = xr.DataArray(uvw[:,:,:,:,0], coords=[tt, xx, yy, zz], dims=['time','x', 'y', 'z'], # name='U', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'}) # v = xr.DataArray(uvw[:,:,:,:,1], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], # name='V', attrs={'standard_name': 'sea_water_x_velocity', 'units': 'm s-1'}) # w = xr.DataArray(uvw[:,:,:,:,2], coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], # name='W', attrs={'standard_name': 'upward_sea_water_velocity', 'units': 'm s-1'}) if xx is None: print('No data found') return None u = u.persist() v = v.persist() w = w.persist() dx = float(xx[1] - xx[0]) dy = float(yy[1] - yy[0]) dz = float(zz[1] - zz[0]) if self._norm_dims: exp = self._result_root.split('/')[4] runSheet = pd.read_csv('~/RunSheet-%s.csv' % exp) runSheet = runSheet.set_index('RunID') runDetails = runSheet.ix[int(self.run_id[-2:])] T = runDetails['T (s)'] h = runDetails['h (m)'] D = runDetails['D (m)'] ww = te / T om = 2. * np.pi / T d_s = (2. * 1E-6 / om)**0.5 bl = 3. * np.pi / 4. * d_s if exp == 'Exp6': if D == 0.1: dy_c = (188. + 82.) / 2 dx_c = 39.25 cx = dx_c / 1000. cy = dy_c / 1000. else: dy_c = (806. + 287.) / 2. * 0.22 dx_c = 113 * 0.22 cx = dx_c / 1000. cy = dy_c / 1000. elif exp == 'Exp8': dy_c = 624 * 0.22 dx_c = 15 cx = dx_c / 1000. cy = dy_c / 1000. xn = (xx + (D / 2. - cx)) / D yn = (yy - cy) / D zn = zz / h xnm, ynm = np.meshgrid(xn, yn) rr = np.sqrt(xnm**2. + ynm**2) cylMask = rr < 0.5 nanPlane = np.ones(cylMask.shape) nanPlane[cylMask] = np.nan nanPlane = nanPlane.T nanPlane = nanPlane[np.newaxis, :, :, np.newaxis] u = u * nanPlane v = v * nanPlane w = w * nanPlane if D == 0.1: xInds = xn > 3. else: xInds = xn > 2. blInd = np.argmax(zn > bl / h) blPlane = int(round(blInd)) Ue = u[:, xInds, :, :] Ue_bar = da.nanmean(Ue, axis=(1, 2, 3)).compute() Ue_bl = da.nanmean(Ue[:, :, :, blPlane], axis=(1, 2)).compute() inds = ~np.isnan(Ue_bl) xv = ww[inds] % 1. xv = xv + np.random.normal(scale=1E-6, size=xv.shape) yv = Ue_bl[inds] xy = np.stack([ np.concatenate([xv - 1., xv, xv + 1.]), np.concatenate([yv, yv, yv]) ]).T xy = xy[xy[:, 0].argsort(), :] xi = np.linspace(-0.5, 1.5, len(xv) / 8) n = np.nanmax(xy[:, 1]) # print(n) # fig,ax = pl.subplots() # ax.scatter(xy[:,0],xy[:,1]/n) # print(xy) spl = si.LSQUnivariateSpline(xy[:, 0], xy[:, 1] / n, t=xi, k=3) roots = spl.roots() der = spl.derivative() slope = der(roots) inds = np.min(np.where(slope > 0)) dt = (roots[inds] % 1.).mean() - 0.5 tpx = np.arange(0, 0.5, 0.001) U0_bl = np.abs(spl(tpx + dt).min() * n) ws = ww - dt Ue_spl = spl((ws - 0.5) % 1.0 + dt) * n * -1.0 #maxima = spl.derivative().roots() #Umax = spl(maxima) #UminIdx = np.argmin(Umax) #U0_bl = np.abs(Umax[UminIdx]*n) #ww_at_min = maxima[UminIdx] #ws = ww - ww_at_min + 0.25 inds = ~np.isnan(Ue_bar) xv = ww[inds] % 1. xv = xv + np.random.normal(scale=1E-6, size=xv.shape) yv = Ue_bar[inds] xy = np.stack([ np.concatenate([xv - 1., xv, xv + 1.]), np.concatenate([yv, yv, yv]) ]).T xy = xy[xy[:, 0].argsort(), :] xi = np.linspace(-0.5, 1.5, len(xv) / 8) n = np.nanmax(xy[:, 1]) spl = si.LSQUnivariateSpline(xy[:, 0], xy[:, 1] / n, t=xi, k=4) maxima = spl.derivative().roots() Umax = spl(maxima) UminIdx = np.argmin(Umax) U0_bar = np.abs(Umax[UminIdx] * n) ww = xr.DataArray(ww, coords=[ tt, ], dims=[ 'time', ]) ws = xr.DataArray(ws - 0.5, coords=[ tt, ], dims=[ 'time', ]) xn = xr.DataArray(xn, coords=[ xx, ], dims=[ 'x', ]) yn = xr.DataArray(yn, coords=[ yy, ], dims=[ 'y', ]) zn = xr.DataArray(zn, coords=[ zz, ], dims=[ 'z', ]) Ue_bar = xr.DataArray(Ue_bar, coords=[ tt, ], dims=[ 'time', ]) Ue_bl = xr.DataArray(Ue_bl, coords=[ tt, ], dims=[ 'time', ]) Ue_spl = xr.DataArray(Ue_spl, coords=[ tt, ], dims=[ 'time', ]) ds_data['ww'] = ww ds_data['ws'] = ws ds_data['xn'] = xn ds_data['yn'] = yn ds_data['zn'] = zn ds_data['Ue_bar'] = Ue_bar ds_data['Ue_bl'] = Ue_bl ds_data['Ue_spl'] = Ue_spl te = xr.DataArray(te, coords=[ tt, ], dims=[ 'time', ]) dims = ['time', 'x', 'y', 'z'] coords = [tt, xx, yy, zz] ds_data['U'] = xr.DataArray(u, coords=coords, dims=dims, name='U', attrs={ 'standard_name': 'sea_water_x_velocity', 'units': 'm s-1' }) ds_data['V'] = xr.DataArray(v, coords=coords, dims=dims, name='V', attrs={ 'standard_name': 'sea_water_x_velocity', 'units': 'm s-1' }) ds_data['W'] = xr.DataArray(w, coords=coords, dims=dims, name='W', attrs={ 'standard_name': 'sea_water_x_velocity', 'units': 'm s-1' }) ds_data['te'] = te # stdV = da.nanstd(v) # stdW = da.nanstd(w) # thres=7. if 'U0_bl' in locals(): condition = (da.fabs(v) / U0_bl > 1.5) | (da.fabs(w) / U0_bl > 0.6) for var in ['U', 'V', 'W']: ds_data[var].data = da.where(condition, np.nan, ds_data[var].data) piv_step_frame = float( self._xml_root.findall('piv/stepFrame')[0].text) print('Calculating tensor') # j = jacobianConv(ds.U, ds.V, ds.W, dx, dy, dz, sigma=1.5) j = jacobianDask(u, v, w, piv_step_frame, dx, dy, dz) print('Done') #j = da.from_array(j,chunks=(20,-1,-1,-1,-1,-1)) # j = jacobianDask(uvw[:,:,:,:,0],uvw[:,:,:,:,1], uvw[:,:,:,:,2], piv_step_frame, dx, dy, dz) jT = da.transpose(j, axes=[0, 1, 2, 3, 5, 4]) # j = j.persist() # jT = jT.persist() jacobianNorm = da.sqrt( da.nansum(da.nansum(j**2., axis=-1), axis=-1)) strainTensor = (j + jT) / 2. vorticityTensor = (j - jT) / 2. strainTensorNorm = da.sqrt( da.nansum(da.nansum(strainTensor**2., axis=-1), axis=-1)) vorticityTensorNorm = da.sqrt( da.nansum(da.nansum(vorticityTensor**2., axis=-1), axis=-1)) divergence = j[:, :, :, :, 0, 0] + j[:, :, :, :, 1, 1] + j[:, :, :, :, 2, 2] # print(divergence) omx = vorticityTensor[:, :, :, :, 2, 1] * 2. omy = vorticityTensor[:, :, :, :, 0, 2] * 2. omz = vorticityTensor[:, :, :, :, 1, 0] * 2. divNorm = divergence / jacobianNorm # divNorm = divNorm.persist() # divNorm_mean = da.nanmean(divNorm) # divNorm_std = da.nanstd(divNorm) dims = ['x', 'y', 'z'] comp = ['u', 'v', 'w'] ds_data['jacobian'] = xr.DataArray( j, coords=[tt, xx, yy, zz, comp, dims], dims=['time', 'x', 'y', 'z', 'comp', 'dims'], name='jacobian') ds_data['jacobianNorm'] = xr.DataArray( jacobianNorm, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='jacobianNorm') ds_data['strainTensor'] = xr.DataArray( strainTensor, coords=[tt, xx, yy, zz, comp, dims], dims=['time', 'x', 'y', 'z', 'comp', 'dims'], name='strainTensor') ds_data['vorticityTensor'] = xr.DataArray( vorticityTensor, coords=[tt, xx, yy, zz, comp, dims], dims=['time', 'x', 'y', 'z', 'comp', 'dims'], name='vorticityTensor') ds_data['vorticityNorm'] = xr.DataArray( vorticityTensorNorm, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='vorticityNorm') ds_data['strainNorm'] = xr.DataArray( strainTensorNorm, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='strainNorm') ds_data['divergence'] = xr.DataArray( divergence, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='divergence') ds_data['omx'] = xr.DataArray(omx, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='omx') ds_data['omy'] = xr.DataArray(omy, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='omy') ds_data['omz'] = xr.DataArray(omz, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='omz') ds_data['divNorm'] = xr.DataArray(divNorm, coords=[tt, xx, yy, zz], dims=['time', 'x', 'y', 'z'], name='divNorm') # ds_data['divNorm_mean'] = xr.DataArray(divNorm_mean) # ds_data['divNorm_std'] = xr.DataArray(divNorm_std) ds = xr.Dataset(ds_data) # if self._from_nc: # for k,v in ds_temp.attrs.items(): # ds.attrs[k]=v #ds = ds.chunk({'time': 20}) self._append_CF_attrs(ds) self._append_attrs(ds) ds.attrs['filename'] = self._result_file if self._norm_dims: KC = U0_bl * T / D delta = (2. * np.pi * d_s) / h S = delta / KC ds.attrs['T'] = T ds.attrs['h'] = h ds.attrs['D'] = D ds.attrs['U0_bl'] = U0_bl ds.attrs['U0_bar'] = U0_bar ds.attrs['KC'] = KC ds.attrs['S'] = S ds.attrs['Delta+'] = ((1E-6 * T)**0.5) / h ds.attrs['Delta_l'] = 2 * np.pi * d_s ds.attrs['Delta_s'] = d_s ds.attrs['Re_D'] = U0_bl * D / 1E-6 ds.attrs['Beta'] = D**2. / (1E-6 * T) delta = (ds.attrs['dx'] * ds.attrs['dy'] * ds.attrs['dz'])**(1. / 3.) dpx = (ds.attrs['pdx'] * ds.attrs['pdy'] * ds.attrs['pdz'])**(1. / 3.) delta_px = delta / dpx dt = ds.attrs['piv_step_ensemble'] # divRMS = da.sqrt(da.nanmean((divergence * dt) ** 2.)) # divRMS = divRMS.persist() # vorticityTensorNorm.persist() # velocityError = divRMS/((3./(2.*delta_px**2.))**0.5) # print(da.percentile(ds_new['vorticityTensorNorm'].data.ravel(),99.)) # print(ds_new['divRMS']) # print(ds_new['divNorm_mean']) # vorticityError = divRMS/dt/da.percentile(vorticityTensorNorm.ravel(),99.) # divNorm_mean = da.nanmean(divNorm) # divNorm_std = da.nanstd(divNorm) # print("initial save") #ds.to_zarr(self._result_file,compute=False) #ds = xr.open_zarr(self._result_file) # xstart = np.argmax(xx > 0.05) # ystart = np.argmax(yy > 0.07) divRMS = da.sqrt(da.nanmean( (divergence * dt)**2.)) #.compute() #divNorm = divergence / jacobianNorm #divNorm = divNorm.compute() #divNorm_mean = da.nanmean(divNorm).compute() #divNorm_std = da.nanstd(divNorm).compute() velocityError = divRMS / ((3. / (2. * delta_px**2.))**0.5) vortNorm = vorticityTensorNorm #.compute() vorticityError = divRMS / dt / np.percentile( vortNorm.ravel(), 99.) velocityError, vorticityError = da.compute( velocityError, vorticityError) #ds.attrs['divNorm_mean'] = divNorm_mean #ds.attrs['divNorm_std'] = divNorm_std ds.attrs['velocityError'] = velocityError ds.attrs['vorticityError'] = vorticityError if self._norm_dims: xInds = (xn > 0.5) & (xn < 2.65) yInds = (yn > -0.75) & (yn < 0.75) else: xInds = range(len(ds['x'])) yInds = range(len(ds['y'])) vrms = (ds['V'][:, xInds, yInds, :]**2.).mean( dim=['time', 'x', 'y', 'z'])**0.5 wrms = (ds['W'][:, xInds, yInds, :]**2.).mean( dim=['time', 'x', 'y', 'z'])**0.5 ds.attrs['Vrms'] = float(vrms.compute()) ds.attrs['Wrms'] = float(wrms.compute()) #fig,ax = pl.subplots() #ax.plot(ds.ws,ds.Ue_spl/U0_bl,color='k') #ax.plot(ds.ws,ds.Ue_bl/U0_bl,color='g') #ax.set_xlabel(r'$t/T$') #ax.set_ylabel(r'$U_{bl}/U_0$') #fig.savefig(self._result_file[:-4] + 'png',dpi=125) #pl.close(fig) # print("second save") #ds.to_netcdf(self._result_file) ds.to_zarr(self._result_file, mode='w') print('Cached ' + self._result_file) #ds = xr.open_dataset(self._result_file,chunks={'time':20}) ds = xr.open_zarr(self._result_file) ds.attrs['filename'] = self._result_file else: #ds = xr.open_dataset(self._result_file,chunks={'time':20}) ds = xr.open_zarr(self._result_file) ds.attrs['filename'] = self._result_file self._ds = ds return self._ds
def identity_by_state( ds: Dataset, *, call_allele_frequency: Hashable = variables.call_allele_frequency, merge: bool = True, ) -> Dataset: """Compute identity by state (IBS) probabilities between all pairs of samples. The IBS probability between a pair of individuals is the probability that a randomly drawn allele from the first individual is identical in state with a randomly drawn allele from the second individual at a single random locus. Parameters ---------- ds Dataset containing call genotype alleles. call_allele_frequency Input variable name holding call_allele_frequency as defined by :data:`sgkit.variables.call_allele_frequency_spec`. If the variable is not present in ``ds``, it will be computed using :func:`call_allele_frequencies`. merge If True (the default), merge the input dataset and the computed output variables into a single dataset, otherwise return only the computed output variables. See :ref:`dataset_merge` for more details. Returns ------- A dataset containing :data:`sgkit.variables.stat_identity_by_state_spec` which is a matrix of pairwise IBS probabilities among all samples. The dimensions are named ``samples_0`` and ``samples_1``. Raises ------ NotImplementedError If the variable holding call_allele_frequency is chunked along the samples dimension. Warnings -------- This method does not currently support datasets that are chunked along the samples dimension. Examples -------- >>> import sgkit as sg >>> ds = sg.simulate_genotype_call_dataset(n_variant=2, n_sample=3, seed=2) >>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE samples S0 S1 S2 variants 0 0/0 1/1 1/0 1 1/1 1/1 1/0 >>> sg.identity_by_state(ds)["stat_identity_by_state"].values # doctest: +NORMALIZE_WHITESPACE array([[1. , 0.5, 0.5], [0.5, 1. , 0.5], [0.5, 0.5, 0.5]]) """ ds = define_variable_if_absent( ds, variables.call_allele_frequency, call_allele_frequency, call_allele_frequencies, ) variables.validate( ds, {call_allele_frequency: variables.call_allele_frequency_spec} ) af = da.asarray(ds[call_allele_frequency]) if len(af.chunks[1]) > 1: raise NotImplementedError( "identity_by_state does not support chunking in the samples dimension" ) af0 = da.where(da.isnan(af), 0.0, af) num = da.einsum("ixj,iyj->xy", af0, af0) called = da.nansum(af, axis=-1) count = da.einsum("ix,iy->xy", called, called) denom = da.where(count == 0, np.nan, count) new_ds = create_dataset( { variables.stat_identity_by_state: ( ("samples_0", "samples_1"), num / denom, ) } ) return conditional_merge_datasets(ds, new_ds, merge)
def Weir_Goudet_beta( ds: Dataset, *, stat_identity_by_state: Hashable = variables.stat_identity_by_state, merge: bool = True, ) -> Dataset: """Estimate pairwise beta between all pairs of samples as described in Weir and Goudet 2017 [1]. Beta is the kinship scaled by the average kinship of all pairs of individuals in the dataset such that the non-diagonal (non-self) values sum to zero. Beta may be corrected to more accurately reflect pedigree based kinship estimates using the formula :math:`\\hat{\\beta}^c=\\frac{\\hat{\\beta}-\\hat{\\beta}_0}{1-\\hat{\\beta}_0}` where :math:`\\hat{\\beta}_0` is the estimated beta between samples which are known to be unrelated [1]. Parameters ---------- ds Genotype call dataset. stat_identity_by_state Input variable name holding stat_identity_by_state as defined by :data:`sgkit.variables.stat_identity_by_state_spec`. If the variable is not present in ``ds``, it will be computed using :func:`identity_by_state`. merge If True (the default), merge the input dataset and the computed output variables into a single dataset, otherwise return only the computed output variables. See :ref:`dataset_merge` for more details. Returns ------- A dataset containing :data:`sgkit.variables.stat_Weir_Goudet_beta_spec` which is a matrix of estimated pairwise kinship relative to the average kinship of all pairs of individuals in the dataset. The dimensions are named ``samples_0`` and ``samples_1``. Examples -------- >>> import sgkit as sg >>> ds = sg.simulate_genotype_call_dataset(n_variant=3, n_sample=3, n_allele=10, seed=3) >>> # sample 2 "inherits" alleles from samples 0 and 1 >>> ds.call_genotype.data[:, 2, 0] = ds.call_genotype.data[:, 0, 0] >>> ds.call_genotype.data[:, 2, 1] = ds.call_genotype.data[:, 1, 0] >>> sg.display_genotypes(ds) # doctest: +NORMALIZE_WHITESPACE samples S0 S1 S2 variants 0 7/1 8/6 7/8 1 9/5 3/6 9/3 2 8/8 8/3 8/8 >>> # estimate beta >>> ds = sg.Weir_Goudet_beta(ds).compute() >>> ds.stat_Weir_Goudet_beta.values # doctest: +NORMALIZE_WHITESPACE array([[ 0.5 , -0.25, 0.25], [-0.25, 0.25, 0. ], [ 0.25, 0. , 0.5 ]]) >>> # correct beta assuming least related samples are unrelated >>> beta = ds.stat_Weir_Goudet_beta >>> beta0 = beta.min() >>> beta_corrected = (beta - beta0) / (1 - beta0) >>> beta_corrected.values # doctest: +NORMALIZE_WHITESPACE array([[0.6, 0. , 0.4], [0. , 0.4, 0.2], [0.4, 0.2, 0.6]]) References ---------- [1] - Bruce, S. Weir, and Jérôme Goudet 2017. "A Unified Characterization of Population Structure and Relatedness." Genetics 206 (4): 2085-2103. """ ds = define_variable_if_absent( ds, variables.stat_identity_by_state, stat_identity_by_state, identity_by_state ) variables.validate( ds, {stat_identity_by_state: variables.stat_identity_by_state_spec} ) ibs = ds[stat_identity_by_state].data # average matching is the mean of non-diagonal elements num = da.nansum(da.tril(ibs, -1)) denom = da.nansum(da.tril(~da.isnan(ibs), -1)) avg = num / denom beta = (ibs - avg) / (1 - avg) new_ds = create_dataset( { variables.stat_Weir_Goudet_beta: ( ("samples_0", "samples_1"), beta, ) } ) return conditional_merge_datasets(ds, new_ds, merge)
def interm(x, y, axis=None): n = da.nansum((x > 0.1) & (y > 0.1) & ~da.isnan(x) & ~da.isnan(y), axis=axis) o = da.nansum(((x > 0.1) | (y > 0.1)) & ~da.isnan(x) & ~da.isnan(y), axis=axis) return n / o
def potential_dask(cluster): d2 = distances(cluster) energy = da.nansum(lj(d2))/2. return energy
import rasterio import glob from dask_rasterio import read_raster, write_raster import dask.array as da earthstat_dir = "C:/Users/angel/DATA/Earthstat/HarvestedAreaYield175Crops_Geotiff/HarvestedAreaYield175Crops_Geotiff/" layer = "Production" ext = ".tif" selected_files = [file for file in glob.iglob(earthstat_dir + '**/*' + layer + ext, recursive=True)] map2array=[] for raster in selected_files: map2array.append(read_raster(raster)) ds_stack = da.stack(map2array) with rasterio.open(selected_files[0]) as src: profile = src.profile profile.update(compress='lzw') write_raster(earthstat_dir + "Sum" + layer + ".tif", da.nansum(ds_stack,0), **profile)