def dask_gd2_nanfill(xx, yy, z_array, algorithm='cubic', **kwargs): """! @brief 2d interpolation using dask and griddata @param xx np_2darray x coord array @param yy np_2darray y coord array @param z_array np_2darray response vals """ n_jobs = kwargs.pop("n_jobs", 4) chunk_size = kwargs.get("chunk_size", int(xx.size / (n_jobs - 1))) # make dask arrays dask_xyz = da.from_array((xx, yy, z_array), chunks=(3, chunk_size, "auto"), name="dask_all") dask_xx = dask_xyz[0, :, :] dask_yy = dask_xyz[1, :, :] dask_zz = dask_xyz[2, :, :] # select only valid values dask_valid_x1 = dask_xx[~da.isnan(dask_zz)] dask_valid_y1 = dask_yy[~da.isnan(dask_zz)] dask_valid_z1 = dask_zz[~da.isnan(dask_zz)] # interpolate for missing values return dask_interpolate(dask_valid_x1, dask_valid_y1, dask_valid_z1, dask_xx, dask_yy, algorithm=algorithm, **kwargs)
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y): """Calculate vertical and horizontal fractional distances t and s""" # General case, ie. where the the corners form an irregular rectangle t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) # Cases where verticals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) # Cases where both verticals and horizontals are parallel idxs = da.isnan(t__) | da.isnan(s__) # Remove extra dimensions idxs = da.ravel(idxs) if da.any(idxs): t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3, out_y, out_x) t__ = da.where(idxs, t_new, t__) s__ = da.where(idxs, s_new, s__) idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1) t__ = da.where(idxs, np.nan, t__) s__ = da.where(idxs, np.nan, s__) return t__, s__
def pearson_1xn( x: da.Array, data: da.Array, value_range: Optional[Tuple[float, float]] = None, k: Optional[int] = None, ) -> Tuple[np.ndarray, np.ndarray]: """ Parameters ---------- x : da.Array data : da.Array value_range : Optional[Tuple[float, float]] = None k : Optional[int] = None """ _, ncols = data.shape corrs = [] for j in range(ncols): mask = ~(da.isnan(x) | da.isnan(data[:, j])) _, (corr, _) = da.corrcoef(np.array(x)[mask], np.array(data[:, j])[mask]) corrs.append(corr) (corrs, ) = da.compute(corrs) corrs = np.asarray(corrs) return corr_filter(corrs, value_range, k)
def kendall_tau_1xn( x: da.Array, data: da.Array, value_range: Optional[Tuple[float, float]] = None, k: Optional[int] = None, ) -> Tuple[np.ndarray, np.ndarray]: """ Parameters ---------- x : da.Array data : da.Array value_range : Optional[Tuple[float, float]] = None k : Optional[int] = None """ _, ncols = data.shape corrs = [] for j in range(ncols): mask = ~(da.isnan(x) | da.isnan(data[:, j])) corr = dask.delayed(lambda a, b: kendalltau(a, b)[0])( np.array(x)[mask], np.array(data[:, j])[mask]) corrs.append(corr) (corrs, ) = da.compute(corrs) corrs = np.asarray(corrs) return corr_filter(corrs, value_range, k)
def weight_data(data, flags, weights): """Return flagged, weighted data and flagged weights. Data that are zero, weights that are zero or unfeasibly high are all set to zero in the output arrays Parameters ---------- data : array of complex flags : array of uint8 or boolean weights : array of floats Returns ------- weighted_data : array of complex flagged_weights : array of floats """ # Suppress comparison with nan warnings by replacing nans with zeros flagged_weights = where( calprocs.asbool(flags) | da.isnan(weights), weights.dtype.type(0), weights) weighted_data = data * flagged_weights # Clear all invalid elements, ie. nans, zeros and high weights # High weights may occur due to certain corner cases when performing excision in ingest. # https://skaafrica.atlassian.net/browse/SPR1-291 should ensure these no longer occur, but # retain this check to be cautious. invalid = (da.isnan(weighted_data) | (weighted_data == 0) | (flagged_weights > calprocs.HIGH_WEIGHT)) weighted_data = where(invalid, weighted_data.dtype.type(0), weighted_data) flagged_weights = where(invalid, flagged_weights.dtype.type(0), flagged_weights) return weighted_data, flagged_weights
def just_score(index_snp, sumstats, pheno, geno): clump = sumstats[sumstats.snp.isin(index_snp)] idx = clump.i.values.astype(int) boole = da.isnan(geno[:, idx]).any(axis=0) idx = idx[~boole] try: genclump = geno[:, idx] except ValueError: print(type(idx), idx.shape, geno.shape) print(idx) print(geno) raise aclump = clump[clump.i.isin(idx.tolist())] assert not np.isnan(aclump.slope).any() try: assert not da.isnan(genclump).any() except AssertionError: print(da.isnan(genclump).sum()) prs = genclump.dot(aclump.slope) assert not da.isnan(prs).any() assert not pd.isna(pheno.PHENO).any() est = np.corrcoef(prs, pheno.PHENO)[1, 0]**2 if np.isnan(est): print(genclump[0:10, :]) print(prs.compute(), pheno.PHENO) print(prs.shape, pheno.shape) print(pheno.columns) raise Exception return est
def kendall_tau_nxn(data: da.Array) -> da.Array: """ Kendal Tau correlation calculation of a n x n correlation matrix for n columns """ _, ncols = data.shape corrmat = np.zeros(shape=(ncols, ncols)) corr_list = [] for i in range(ncols): for j in range(i + 1, ncols): mask = ~(da.isnan(data[:, i]) | da.isnan(data[:, j])) tmp = dask.delayed(lambda a, b: kendalltau(a, b).correlation)( data[:, i][mask], data[:, j][mask]) corr_list.append(tmp) corr_comp = dask.compute(*corr_list) # TODO avoid explicitly compute idx = 0 for i in range(ncols): # TODO: Optimize by using numpy api for j in range(i + 1, ncols): corrmat[i][j] = corr_comp[idx] idx = idx + 1 corrmat2 = corrmat + corrmat.T np.fill_diagonal(corrmat2, 1) corrmat = da.from_array(corrmat2) return corrmat
def fill_year_month(inputfile, bucket_name, input_folder, keys, key_idx, cores): s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) key_name_header = 's3://' + bucket_name + '/' + input_folder + '/' lst_already_downloaded = list( bucket.objects.filter(Prefix=input_folder + '/final_panoids_' + inputfile)) if not (len(lst_already_downloaded) > 0 and lst_already_downloaded[0].key == input_folder + '/final_panoids_' + inputfile): pts = dd.read_csv(key_name_header + 'panoids_' + os.path.basename(inputfile), blocksize=2000000, header=0) print("Filling missing month and year values with " + str(pts.npartitions) + "data chunks") pts_empty = pts[da.isnan(pts.YEAR) | da.isnan(pts.MONTH)] pts_full = pts[~(da.isnan(pts.YEAR) | da.isnan(pts.MONTH))] pts_filled = pts_empty.map_partitions(get_month_and_year_from_api, keys, 0) pts = dd.concat([pts_full, pts_filled], axis=0, interleave_partitions=True).compute() with fs.open(key_name_header + 'final_panoids_' + inputfile, 'wb') as f: pts.to_csv(f) #pts.to_csv(os.path.join(os.path.join(DATA_FOLDER, os.path.basename(input_folder), 'final_panoids_' + inputfile)), index=False, header=True) pts = pts.loc[~(pd.isnull(pts.YEAR) | pd.isnull(pts.MONTH))] with fs.open(key_name_header + 'final_panoids_' + inputfile, 'wb') as f: pts.to_csv(f)
def process_pair(gwast, geno, pheno, pv, ld): print('Computing PRS with R2 of', ld, 'and pvalue threshold of', pv) index = gwast[gwast.loc[:, 'pvthr_%.2f' % pv].values] index = index.sort_values(by='pvalue', ascending=True).groupby( 'clumps_%.2f' % ld).first() if not index.empty: curr_mem = available_memory - psutil.virtual_memory().available with ProgressBar(), dask.config.set(memory=curr_mem, scheduler="single-threaded"): sub = geno[:, sorted(index.i.values)] genotype = da.ma.masked_array(sub, mask=da.isnan(sub)) eff_size = da.ma.masked_array(index.slope, mask=da.isnan( index.slope)) prs = genotype.dot(eff_size).compute() # geno[:, index.i.values].dot(index.slope) print('PRS done for', ld, 'R2 and a pvalue threshold of', pv) pheno = pheno.copy() pheno['prs'] = prs print('Computing R2 with true phenotype') r2 = pheno.reindex(columns=['pheno', 'prs']).corr().loc[ 'pheno', 'prs'] ** 2 print(r2) return pheno, index, ld, pv, r2 else: print('\tNo variant left after prunning...Skipping')
def get_counts(cost_coverage=False): """Get cell counts for each category.""" code_dict = get_codes(cost_coverage) # Read in code and conus rasters chunks = {"band": 1, "x": 5000, "y": 5000} code_path = DP.join("rasters/albers/acre/cost_codes.tif") cost_path = DP.join("rasters/albers/acre/rent_map.tif") conus_path = DP.join("rasters/albers/acre/masks/conus.tif") codes = xr.open_rasterio(code_path, chunks=chunks)[0].data costs = xr.open_rasterio(cost_path, chunks=chunks)[0].data conus = xr.open_rasterio(conus_path, chunks=chunks)[0].data # Dask array's `count_nonzero` counts na values codes[da.isnan(codes)] = 0 conus[da.isnan(conus)] = 0 # If calculating costs if cost_coverage: coverage = codes[(costs > 0) | (codes == 9999)] # No exclusion in cost else: coverage = codes.copy() # Extract code from dictionary blm_codes = code_dict["blm"] tribal_codes = code_dict["tribal"] state_codes = code_dict["state"] private_codes = code_dict["private"] # Arrays developable = conus[codes != 9999] dev_covered = coverage[coverage != 9999] excl = coverage[coverage == 9999] blm = coverage[da.isin(coverage, blm_codes)] tribal = coverage[da.isin(coverage, tribal_codes)] state = coverage[da.isin(coverage, state_codes)] private = coverage[da.isin(coverage, private_codes)] arrays = {"excl": excl, "blm": blm, "tribal": tribal, "state": state, "private": private, "covered": coverage, "total": conus, "developable": developable, "dev_covered": dev_covered} # Collect counts counts = {} with Client(): for key, item in tqdm(arrays.items(), position=0): counts["n" + key] = da.count_nonzero(item).compute() return counts
def _kendall_tau_1xn(x: da.Array, data: da.Array) -> da.Array: _, ncols = data.shape datamask = da.isnan(data) xmask = da.isnan(x)[:, 0] corrs = [] for j in range(ncols): y = data[:, [j]] mask = ~(xmask | datamask[:, j]) corr = da.from_delayed(kendalltau(x[mask], y[mask]), dtype=np.float, shape=()) corrs.append(corr) return da.stack(corrs)
def haversines(x1, x2, y1, y2, z1=None, z2=None): x1, x2 = da.deg2rad(x1), da.deg2rad(x2) y1, y2 = da.deg2rad(y1), da.deg2rad(y2) x = (x2 - x1) * da.cos((y1 + y2) * 0.5) * cst.r_earth y = (y2 - y1) * cst.r_earth * da.ones_like(x1) * da.ones_like(x2) if z1 is None or z2 is None: return da.stack((x, y), axis=-1) else: z1 = da.where(da.isnan(z1), 0, z1) z2 = da.where(da.isnan(z2), 0, z2) z = (z2 - z1) * da.ones_like(x) return da.stack((x, y, z), axis=-1)
def cov(*args, axis=None, **kwargs): """ covariance """ if axis is None: args = [x.flatten() for x in args] axis = 0 X = da.stack(args, axis=-1).rechunk(com.CHUNKSIZE) cond = da.any(da.isnan(X), axis=-1) X = da.where(cond[..., None], np.nan, X) X -= da.nanmean(X, axis=axis, keepdims=True) X = da.where(da.isnan(X), 0, X) return X.swapaxes(axis, -1) @ X.swapaxes(axis, -2).conj() / (X.shape[axis] - 1)
def phase_rotation(self, darray, rotation, preview=None): """ Description ----------- Rotate the phase of the seismic data by a specified angle Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays rotation : Number (degrees), angle of rotation Keywork Arguments ----------------- preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ phi = np.deg2rad(rotation) kernel = (1,1,25) darray, chunks_init = self.create_array(darray, kernel, preview=preview) analytical_trace = darray.map_blocks(signal.hilbert, dtype=darray.dtype) result = analytical_trace.real * da.cos(phi) - analytical_trace.imag * da.sin(phi) result = util.trim_dask_array(result, kernel) result[da.isnan(result)] = 0 return(result)
def gradient_magnitude(self, darray, sigmas=(1, 1, 1), preview=None): """ Description ----------- Compute the 3D Gradient Magnitude using a Gaussian Operator Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- sigmas : tuple (len 3), gaussian operator in I, J, K preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ kernel = tuple(2 * (4 * np.array(sigmas) + 0.5).astype(int) + 1) darray, chunks_init = self.create_array(darray, kernel, preview=preview) result = darray.map_blocks(ndi.gaussian_gradient_magnitude, sigma=sigmas, dtype=darray.dtype) result = util.trim_dask_array(result, kernel) result[da.isnan(result)] = 0 return (result)
def trace_agc(self, darray, kernel=(1, 1, 9), preview=None): """ Description ----------- Apply an adaptive trace gain to input seismic Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- kernel : tuple (len 3), operator size preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ darray, chunks_init = self.create_array(darray, kernel, preview=preview) rms = self.rms(darray, kernel) rms_max = rms.max() result = darray * (1.5 - (rms / rms_max)) result[da.isnan(result)] = 0 return (result)
def eig_complex(self, darray, kernel=(3, 3, 9), preview=None): """ Description ----------- Compute multi-trace semblance from 3D seismic incorporating the analytic trace Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- kernel : tuple (len 3), operator size preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ # Function to compute the COV def cov(x, ki, kj, kk): x = x.reshape((ki * kj, kk)) x = np.hstack([x.real, x.imag]) return (x.dot(x.T)) # Function to extract patches and perform algorithm def operation(chunk, kernel): np.seterr(all='ignore') ki, kj, kk = kernel patches = util.extract_patches(chunk, kernel) out_data = [] for i in range(0, patches.shape[0]): traces = patches[i] traces = traces.reshape(-1, ki * kj * kk) cov = np.apply_along_axis(cov, 1, traces, ki, kj, kk) vals = np.linalg.eigvals(cov) vals = np.abs(vals.max(axis=1) / vals.sum(axis=1)) out_data.append(vals) out_data = np.asarray(out_data).reshape(patches.shape[:3]) return (out_data) # Generate Dask Array as necessary and perform algorithm darray, chunks_init = self.create_array(darray, kernel, preview) hilbert = darray.map_blocks(util.hilbert, dtype=darray.dtype) result = hilbert.map_blocks(operation, kernel=kernel, dtype=darray.dtype) result = util.trim_dask_array(result, kernel) result[da.isnan(result)] = 0 return (result)
def pod(self, threshold=1, inplace=True): """ Method to change rainfall accumulation arrays to a boolean variable of whether it is dry or not. Unit is mm / day. Because we still like to incorporate np.NaN on the unobserved areas, the array has to be floats of 0 and 1 Default threshold taken from official definition: https://www.ecad.eu/indicesextremes/indicesdictionary.php#3 """ if hasattr(self, 'obsd'): data = da.where(da.isnan(self.obsd.array.data), self.obsd.array.data, self.obsd.array.data < threshold) else: data = np.where(np.isnan(self.obs.array), self.obs.array, self.obs.array.data < threshold) result = xr.DataArray(data=data, coords=self.obs.array.coords, dims=self.obs.array.dims, attrs={ 'long_name': 'probability_of_dryness', 'threshold_mm_day': threshold, 'units': self.old_units, 'new_units': '' }, name='-'.join([self.obs.basevar, 'pod'])) if inplace: self.obs.array = result self.obs.newvar = 'pod' else: return (result)
def reflection_intensity(self, darray, kernel=(1,1,9), preview=None): """ Description ----------- Compute reflection intensity by integrating the trace over a specified window Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- kernel : tuple (len 3), operator size preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ # Function to extract patches and perform algorithm def operation(chunk, kernel): x = util.extract_patches(chunk, (1, 1, kernel[-1])) out = np.trapz(x).reshape(x.shape[:3]) return(out) darray, chunks_init = self.create_array(darray, kernel, preview=preview) result = darray.map_blocks(operation, kernel=kernel, dtype=darray.dtype, chunks=chunks_init) result[da.isnan(result)] = 0 return(result)
def gaussian(self, darray, sigmas=(1, 1, 1), preview=None): """ Description ----------- Perform gaussian smoothing of input seismic Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- sigmas : tuple (len 3), smoothing parameters in I, J, K preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ # Generate Dask Array as necessary and perform algorithm kernel = tuple((np.array(sigmas) * 2.5).astype(int)) darray, chunks_init = self.create_array(darray, kernel, preview=preview) result = darray.map_blocks(ndi.gaussian_filter, sigma=sigmas, dtype=darray.dtype) result = util.trim_dask_array(result, kernel) result[da.isnan(result)] = 0 return(result)
def convolution(self, darray, kernel=(3, 3, 3), preview=None): """ Description ----------- Perform convolution smoothing of input seismic data Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- kernel : tuple (len 3), operator size in I, J, K preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ # Generate Dask Array as necessary and perform algorithm darray, chunks_init = self.create_array(darray, kernel, preview=preview) result = darray.map_blocks(ndi.uniform_filter, size=kernel, dtype=darray.dtype) result = util.trim_dask_array(result, kernel) result[da.isnan(result)] = 0 return(result)
def compute_maf(X): r"""Compute minor allele frequencies. It assumes that ``X`` encodes 0, 1, and 2 representing the number of alleles (or dosage), or ``NaN`` to represent missing values. Parameters ---------- X : array_like Genotype matrix. Returns ------- array_like Minor allele frequencies. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from limix.qc import compute_maf >>> >>> random = RandomState(0) >>> X = random.randint(0, 3, size=(100, 10)) >>> >>> print(compute_maf(X)) # doctest: +FLOAT_CMP [0.49 0.49 0.445 0.495 0.5 0.45 0.48 0.48 0.47 0.435] """ import dask.array as da import xarray as xr from pandas import DataFrame from numpy import isnan, logical_not, minimum, nansum if isinstance(X, da.Array): s0 = da.nansum(X, axis=0).compute() denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute() elif isinstance(X, DataFrame): s0 = X.sum(axis=0, skipna=True) denom = 2 * logical_not(X.isna()).sum(axis=0) elif isinstance(X, xr.DataArray): if "sample" in X.dims: kwargs = {"dim": "sample"} else: kwargs = {"axis": 0} s0 = X.sum(skipna=True, **kwargs) denom = 2 * logical_not(isnan(X)).sum(**kwargs) else: s0 = nansum(X, axis=0) denom = 2 * logical_not(isnan(X)).sum(axis=0) s0 = s0 / denom s1 = 1 - s0 maf = minimum(s0, s1) if hasattr(maf, "name"): maf.name = "maf" return maf
def redistribute(pop, target, default): # Redistribute population proportionally at the target locations redistribution = target / target.sum() # If there are no target locations, all redistribution values will be inf. # In this case, use the default return da.where( da.isinf(redistribution) | da.isnan(redistribution), default, pop * redistribution)
def scatter_with_regression( x: da.Array, y: da.Array, sample_size: int, k: Optional[int] = None ) -> Tuple[Tuple[da.Array, da.Array], Tuple[da.Array, da.Array], Optional[da.Array]]: """Calculate pearson correlation on 2 given arrays. Parameters ---------- xarr : da.Array yarr : da.Array sample_size : int k : Optional[int] = None Highlight k points which influence pearson correlation most """ if k == 0: raise ValueError("k should be larger than 0") xp1 = da.vstack([x, da.ones_like(x)]).T xp1 = xp1.rechunk((xp1.chunks[0], -1)) mask = ~(da.isnan(x) | da.isnan(y)) # if chunk size in the first dimension is 1, lstsq will use sfqr instead of tsqr, # where the former does not support nan in shape. if len(xp1.chunks[0]) == 1: xp1 = xp1.rechunk((2, -1)) y = y.rechunk((2, -1)) mask = mask.rechunk((2, -1)) (coeffa, coeffb), _, _, _ = da.linalg.lstsq(xp1[mask], y[mask]) if sample_size < x.shape[0]: samplesel = da.random.choice(x.shape[0], int(sample_size), chunks=x.chunksize) x = x[samplesel] y = y[samplesel] if k is None: return (coeffa, coeffb), (x, y), None influences = pearson_influence(x, y) return (coeffa, coeffb), (x, y), influences
def build_exclusions(): """ Build exclusion file for the WETO rent map.""" # Using the 'core exclusions raster' that antyhony put together. excl = xr.open_rasterio(EXL_PATH, chunks=CHUNKS) roads = xr.open_rasterio(ROAD_PATH, chunks=CHUNKS) conus = xr.open_rasterio(CONUS_PATH, chunks=CHUNKS) rails = xr.open_rasterio(RAIL_PATH, chunks=CHUNKS) # Different na values every time :/ roadsna = roads.attrs["nodatavals"][0] conusna = conus.attrs["nodatavals"][0] railsna = rails.attrs["nodatavals"][0] # Get just the data arrays excl = excl[0].data roads = roads[0].data conus = conus[0].data rails = rails[0].data # Set nodata values to 0 excl[da.isnan(excl)] = 0 roads[roads == roadsna] = 0 conus[conus == conusna] = 0 rails[rails == railsna] = 0 # We need to reverse the original exclusions excl = (excl - 1) * -1 # Combine roads excl = da.stack([excl, roads, rails], axis=0).max(axis=0) # And let's make exclusion values 9999 since 1 will be a code excl[excl == 1] = 9999 # And cut out just CONUS for mapping excl = excl * conus # Compute print("Combining exclusion layers...") with Client(): excl = excl.compute() # save to raster print("Saving to 90 meter reV grid...") to_raster(excl, DP.join("rasters", "rent_exclusions.tif"), template=EXL_PATH, compress="deflate") # warp to acre grid in north american albers equal area conic print("Warping to acre grid...") res = 63.614907234075254 warp(DP.join("rasters", "rent_exclusions.tif"), DP.join("rasters", "albers", "acre", "rent_exclusions.tif"), xRes=res, yRes=res, overwrite=True) print("Done.")
def scatter_with_regression( xarr: da.Array, yarr: da.Array, sample_size: int, k: Optional[int] = None ) -> Tuple[Tuple[float, float], dd.DataFrame, Optional[np.ndarray]]: """ Calculate pearson correlation on 2 given arrays. Parameters ---------- xarr : da.Array yarr : da.Array sample_size : int k : Optional[int] = None Highlight k points which influence pearson correlation most Returns ------- Intermediate """ if k == 0: raise ValueError("k should be larger than 0") mask = ~(da.isnan(xarr) | da.isnan(yarr)) xarr = da.from_array(np.array(xarr)[mask]) yarr = da.from_array(np.array(yarr)[mask]) xarrp1 = da.vstack([xarr, da.ones_like(xarr)]).T xarrp1 = xarrp1.rechunk((xarrp1.chunks[0], -1)) (coeffa, coeffb), _, _, _ = da.linalg.lstsq(xarrp1, yarr) if sample_size < len(xarr): samplesel = np.random.choice(len(xarr), int(sample_size)) xarr = xarr[samplesel] yarr = yarr[samplesel] df = dd.concat([dd.from_dask_array(arr) for arr in [xarr, yarr]], axis=1) df.columns = ["x", "y"] if k is None: return (coeffa, coeffb), df, None influences = pearson_influence(xarr, yarr) return (coeffa, coeffb), df, influences
def _unequal_var_ttest_denom(v1, n1, v2, n2): vn1 = v1 / n1 vn2 = v2 / n2 with np.errstate(divide="ignore", invalid="ignore"): df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). # Hence it doesn't matter what df is as long as it's not NaN. df = da.where(da.isnan(df), 1, df) # XXX: np -> da denom = da.sqrt(vn1 + vn2) return df, denom
def _unequal_var_ttest_denom(v1, n1, v2, n2): vn1 = v1 / n1 vn2 = v2 / n2 with np.errstate(divide='ignore', invalid='ignore'): df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). # Hence it doesn't matter what df is as long as it's not NaN. df = da.where(da.isnan(df), 1, df) # XXX: np -> da denom = da.sqrt(vn1 + vn2) return df, denom
def _true_color_dask(r, g, b, nodata, c, th): pixel_max = 255 alpha = da.where(da.logical_or(da.isnan(r), r <= nodata), 0, pixel_max).astype(np.uint8) red = (_normalize_data(r, pixel_max, c, th)).astype(np.uint8) green = (_normalize_data(g, pixel_max, c, th)).astype(np.uint8) blue = (_normalize_data(b, pixel_max, c, th)).astype(np.uint8) out = da.stack([red, green, blue, alpha], axis=-1) return out
def response_frequency(self, darray, sample_rate=4, preview=None): """ Description ----------- Compute the Response Frequency of the input data Parameters ---------- darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- sample_rate : Number, sample rate in milliseconds (ms) preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- result : Dask Array """ def operation(chunk1, chunk2, chunk3): out = np.zeros(chunk1.shape) for i, j in np.ndindex(out.shape[:-1]): ints = np.unique(chunk3[i, j, :]) for ii in ints: idx = np.where(chunk3[i, j, :] == ii)[0] peak = idx[chunk1[i, j, idx].argmax()] out[i, j, idx] = chunk2[i, j, peak] return (out) darray, chunks_init = self.create_array(darray, preview=preview) env = self.envelope(darray) inst_freq = self.instantaneous_frequency(darray, sample_rate) troughs = env.map_blocks(util.local_events, comparator=np.less, dtype=darray.dtype) troughs = troughs.cumsum(axis=-1) result = da.map_blocks(operation, env, inst_freq, troughs, dtype=darray.dtype) result[da.isnan(result)] = 0 return (result)
def _method_c(self, sr, il, cos_z, nodata_samps, min_samples, n_jobs, robust, band_coeffs, band): r""" Normalizes terrain using the C-correction method Args: sr (Dask Array): The surface reflectance data. il (Dask Array): The solar illumination. cos_z (Dask Array): The cosine of the solar zenith angle. nodata_samps (Dask Array): Samples where 1='no data' and 0='valid data'. min_samples (Optional[int]): The minimum number of samples required to fit a regression. n_jobs (Optional[int]): The number of parallel workers for ``LinearRegression.fit`` or ``TheilSenRegressor.fit``. robust (Optional[bool]): Whether to fit a robust regression. band_coeffs (dict): Slope and intercept coefficients for each band. band (int | str): The band. References: See :cite:`teillet_etal_1982` for the C-correction method. Returns: ``dask.array`` """ nodata = nodata_samps.compute().flatten() idx = np.where(nodata == 0)[0] if idx.shape[0] < min_samples: return sr X = il.compute().flatten()[idx][:, np.newaxis] if band_coeffs: slope_m, intercept_b = band_coeffs[band] else: y = sr.compute().flatten()[idx] slope_m, intercept_b = self._regress_a(X, y, robust, n_jobs) c = intercept_b / slope_m # Get the A-factor a_factor = (cos_z + c) / (il + c) a_factor = da.where(da.isnan(a_factor), 1, a_factor) sr_a = sr * a_factor return da.where((sr_a > 1) | (nodata_samps == 1), sr, sr_a).clip(0, 1)
def test_dtype_complex(): x = np.arange(24).reshape((4, 6)).astype('f4') y = np.arange(24).reshape((4, 6)).astype('i8') z = np.arange(24).reshape((4, 6)).astype('i2') a = da.from_array(x, chunks=(2, 3)) b = da.from_array(y, chunks=(2, 3)) c = da.from_array(z, chunks=(2, 3)) def eq(a, b): return (isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)) assert eq(a._dtype, x.dtype) assert eq(b._dtype, y.dtype) assert eq((a + 1)._dtype, (x + 1).dtype) assert eq((a + b)._dtype, (x + y).dtype) assert eq(a.T._dtype, x.T.dtype) assert eq(a[:3]._dtype, x[:3].dtype) assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype) assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype) assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype) assert eq(b.std()._dtype, y.std().dtype) assert eq(c.sum()._dtype, z.sum().dtype) assert eq(a.min()._dtype, a.min().dtype) assert eq(b.std()._dtype, b.std().dtype) assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype) assert eq(da.sin(c)._dtype, np.sin(z).dtype) assert eq(da.exp(b)._dtype, np.exp(y).dtype) assert eq(da.floor(a)._dtype, np.floor(x).dtype) assert eq(da.isnan(b)._dtype, np.isnan(y).dtype) with ignoring(ImportError): assert da.isnull(b)._dtype == 'bool' assert da.notnull(b)._dtype == 'bool' x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')]) d = da.from_array(x, chunks=(1,)) assert eq(d['text']._dtype, x['text'].dtype) assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
def test_arithmetic(): x = np.arange(5).astype('f4') + 2 y = np.arange(5).astype('i8') + 2 z = np.arange(5).astype('i4') + 2 a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) c = da.from_array(z, chunks=(2,)) assert eq(a + b, x + y) assert eq(a * b, x * y) assert eq(a - b, x - y) assert eq(a / b, x / y) assert eq(b & b, y & y) assert eq(b | b, y | y) assert eq(b ^ b, y ^ y) assert eq(a // b, x // y) assert eq(a ** b, x ** y) assert eq(a % b, x % y) assert eq(a > b, x > y) assert eq(a < b, x < y) assert eq(a >= b, x >= y) assert eq(a <= b, x <= y) assert eq(a == b, x == y) assert eq(a != b, x != y) assert eq(a + 2, x + 2) assert eq(a * 2, x * 2) assert eq(a - 2, x - 2) assert eq(a / 2, x / 2) assert eq(b & True, y & True) assert eq(b | True, y | True) assert eq(b ^ True, y ^ True) assert eq(a // 2, x // 2) assert eq(a ** 2, x ** 2) assert eq(a % 2, x % 2) assert eq(a > 2, x > 2) assert eq(a < 2, x < 2) assert eq(a >= 2, x >= 2) assert eq(a <= 2, x <= 2) assert eq(a == 2, x == 2) assert eq(a != 2, x != 2) assert eq(2 + b, 2 + y) assert eq(2 * b, 2 * y) assert eq(2 - b, 2 - y) assert eq(2 / b, 2 / y) assert eq(True & b, True & y) assert eq(True | b, True | y) assert eq(True ^ b, True ^ y) assert eq(2 // b, 2 // y) assert eq(2 ** b, 2 ** y) assert eq(2 % b, 2 % y) assert eq(2 > b, 2 > y) assert eq(2 < b, 2 < y) assert eq(2 >= b, 2 >= y) assert eq(2 <= b, 2 <= y) assert eq(2 == b, 2 == y) assert eq(2 != b, 2 != y) assert eq(-a, -x) assert eq(abs(a), abs(x)) assert eq(~(a == b), ~(x == y)) assert eq(~(a == b), ~(x == y)) assert eq(da.logaddexp(a, b), np.logaddexp(x, y)) assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) assert eq(da.exp(b), np.exp(y)) assert eq(da.log(a), np.log(x)) assert eq(da.log10(a), np.log10(x)) assert eq(da.log1p(a), np.log1p(x)) assert eq(da.expm1(b), np.expm1(y)) assert eq(da.sqrt(a), np.sqrt(x)) assert eq(da.square(a), np.square(x)) assert eq(da.sin(a), np.sin(x)) assert eq(da.cos(b), np.cos(y)) assert eq(da.tan(a), np.tan(x)) assert eq(da.arcsin(b/10), np.arcsin(y/10)) assert eq(da.arccos(b/10), np.arccos(y/10)) assert eq(da.arctan(b/10), np.arctan(y/10)) assert eq(da.arctan2(b*10, a), np.arctan2(y*10, x)) assert eq(da.hypot(b, a), np.hypot(y, x)) assert eq(da.sinh(a), np.sinh(x)) assert eq(da.cosh(b), np.cosh(y)) assert eq(da.tanh(a), np.tanh(x)) assert eq(da.arcsinh(b*10), np.arcsinh(y*10)) assert eq(da.arccosh(b*10), np.arccosh(y*10)) assert eq(da.arctanh(b/10), np.arctanh(y/10)) assert eq(da.deg2rad(a), np.deg2rad(x)) assert eq(da.rad2deg(a), np.rad2deg(x)) assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) assert eq(da.logical_not(a < 1), np.logical_not(x < 1)) assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) assert eq(da.isfinite(a), np.isfinite(x)) assert eq(da.isinf(a), np.isinf(x)) assert eq(da.isnan(a), np.isnan(x)) assert eq(da.signbit(a - 3), np.signbit(x - 3)) assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) assert eq(da.ldexp(c, c), np.ldexp(z, z)) assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) assert eq(da.floor(a * 0.5), np.floor(x * 0.5)) assert eq(da.ceil(a), np.ceil(x)) assert eq(da.trunc(a / 2), np.trunc(x / 2)) assert eq(da.degrees(b), np.degrees(y)) assert eq(da.radians(a), np.radians(x)) assert eq(da.rint(a + 0.3), np.rint(x + 0.3)) assert eq(da.fix(a - 2.5), np.fix(x - 2.5)) assert eq(da.angle(a + 1j), np.angle(x + 1j)) assert eq(da.real(a + 1j), np.real(x + 1j)) assert eq((a + 1j).real, np.real(x + 1j)) assert eq(da.imag(a + 1j), np.imag(x + 1j)) assert eq((a + 1j).imag, np.imag(x + 1j)) assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) assert eq((a + 1j * b).conj(), (x + 1j * y).conj()) assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) assert eq(da.fabs(b), np.fabs(y)) assert eq(da.sign(b - 2), np.sign(y - 2)) l1, l2 = da.frexp(a) r1, r2 = np.frexp(x) assert eq(l1, r1) assert eq(l2, r2) l1, l2 = da.modf(a) r1, r2 = np.modf(x) assert eq(l1, r1) assert eq(l2, r2) assert eq(da.around(a, -1), np.around(x, -1))