def dask_gd2_nanfill(xx, yy, z_array, algorithm='cubic', **kwargs):
    """!
    @brief 2d interpolation using dask and griddata
    @param xx np_2darray x coord array
    @param yy np_2darray y coord array
    @param z_array np_2darray response vals
    """
    n_jobs = kwargs.pop("n_jobs", 4)
    chunk_size = kwargs.get("chunk_size", int(xx.size / (n_jobs - 1)))
    # make dask arrays
    dask_xyz = da.from_array((xx, yy, z_array),
                             chunks=(3, chunk_size, "auto"),
                             name="dask_all")
    dask_xx = dask_xyz[0, :, :]
    dask_yy = dask_xyz[1, :, :]
    dask_zz = dask_xyz[2, :, :]

    # select only valid values
    dask_valid_x1 = dask_xx[~da.isnan(dask_zz)]
    dask_valid_y1 = dask_yy[~da.isnan(dask_zz)]
    dask_valid_z1 = dask_zz[~da.isnan(dask_zz)]

    # interpolate for missing values
    return dask_interpolate(dask_valid_x1,
                            dask_valid_y1,
                            dask_valid_z1,
                            dask_xx,
                            dask_yy,
                            algorithm=algorithm,
                            **kwargs)
Ejemplo n.º 2
0
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y):
    """Calculate vertical and horizontal fractional distances t and s"""

    # General case, ie. where the the corners form an irregular rectangle
    t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x)

    # Cases where verticals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)

    if da.any(idxs):
        t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2,
                                                      pt_3, pt_4,
                                                      out_y, out_x)

        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    # Cases where both verticals and horizontals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)
    if da.any(idxs):
        t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3,
                                                   out_y, out_x)
        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
    t__ = da.where(idxs, np.nan, t__)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Ejemplo n.º 3
0
def _get_ts_dask(pt_1, pt_2, pt_3, pt_4, out_x, out_y):
    """Calculate vertical and horizontal fractional distances t and s"""

    # General case, ie. where the the corners form an irregular rectangle
    t__, s__ = _get_ts_irregular_dask(pt_1, pt_2, pt_3, pt_4, out_y, out_x)

    # Cases where verticals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)

    if da.any(idxs):
        t_new, s_new = _get_ts_uprights_parallel_dask(pt_1, pt_2,
                                                      pt_3, pt_4,
                                                      out_y, out_x)

        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    # Cases where both verticals and horizontals are parallel
    idxs = da.isnan(t__) | da.isnan(s__)
    # Remove extra dimensions
    idxs = da.ravel(idxs)
    if da.any(idxs):
        t_new, s_new = _get_ts_parallellogram_dask(pt_1, pt_2, pt_3,
                                                   out_y, out_x)
        t__ = da.where(idxs, t_new, t__)
        s__ = da.where(idxs, s_new, s__)

    idxs = (t__ < 0) | (t__ > 1) | (s__ < 0) | (s__ > 1)
    t__ = da.where(idxs, np.nan, t__)
    s__ = da.where(idxs, np.nan, s__)

    return t__, s__
Ejemplo n.º 4
0
def pearson_1xn(
    x: da.Array,
    data: da.Array,
    value_range: Optional[Tuple[float, float]] = None,
    k: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Parameters
    ----------
    x : da.Array
    data : da.Array
    value_range : Optional[Tuple[float, float]] = None
    k : Optional[int] = None
    """
    _, ncols = data.shape

    corrs = []
    for j in range(ncols):
        mask = ~(da.isnan(x) | da.isnan(data[:, j]))
        _, (corr,
            _) = da.corrcoef(np.array(x)[mask],
                             np.array(data[:, j])[mask])
        corrs.append(corr)

    (corrs, ) = da.compute(corrs)
    corrs = np.asarray(corrs)

    return corr_filter(corrs, value_range, k)
Ejemplo n.º 5
0
def kendall_tau_1xn(
    x: da.Array,
    data: da.Array,
    value_range: Optional[Tuple[float, float]] = None,
    k: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Parameters
    ----------
    x : da.Array
    data : da.Array
    value_range : Optional[Tuple[float, float]] = None
    k : Optional[int] = None
    """

    _, ncols = data.shape

    corrs = []
    for j in range(ncols):
        mask = ~(da.isnan(x) | da.isnan(data[:, j]))
        corr = dask.delayed(lambda a, b: kendalltau(a, b)[0])(
            np.array(x)[mask], np.array(data[:, j])[mask])
        corrs.append(corr)

    (corrs, ) = da.compute(corrs)
    corrs = np.asarray(corrs)
    return corr_filter(corrs, value_range, k)
Ejemplo n.º 6
0
def weight_data(data, flags, weights):
    """Return flagged, weighted data and flagged weights.

    Data that are zero, weights that are zero or unfeasibly high
    are all set to zero in the output arrays

    Parameters
    ----------
    data    : array of complex
    flags   : array of uint8 or boolean
    weights : array of floats

    Returns
    -------
    weighted_data : array of complex
    flagged_weights : array of floats
    """
    # Suppress comparison with nan warnings by replacing nans with zeros
    flagged_weights = where(
        calprocs.asbool(flags) | da.isnan(weights), weights.dtype.type(0),
        weights)
    weighted_data = data * flagged_weights
    # Clear all invalid elements, ie. nans, zeros and high weights
    # High weights may occur due to certain corner cases when performing excision in ingest.
    # https://skaafrica.atlassian.net/browse/SPR1-291 should ensure these no longer occur, but
    # retain this check to be cautious.
    invalid = (da.isnan(weighted_data) | (weighted_data == 0) |
               (flagged_weights > calprocs.HIGH_WEIGHT))
    weighted_data = where(invalid, weighted_data.dtype.type(0), weighted_data)
    flagged_weights = where(invalid, flagged_weights.dtype.type(0),
                            flagged_weights)
    return weighted_data, flagged_weights
Ejemplo n.º 7
0
def just_score(index_snp, sumstats, pheno, geno):
    clump = sumstats[sumstats.snp.isin(index_snp)]
    idx = clump.i.values.astype(int)
    boole = da.isnan(geno[:, idx]).any(axis=0)
    idx = idx[~boole]
    try:
        genclump = geno[:, idx]
    except ValueError:
        print(type(idx), idx.shape, geno.shape)
        print(idx)
        print(geno)
        raise
    aclump = clump[clump.i.isin(idx.tolist())]
    assert not np.isnan(aclump.slope).any()
    try:
        assert not da.isnan(genclump).any()
    except AssertionError:
        print(da.isnan(genclump).sum())
    prs = genclump.dot(aclump.slope)
    assert not da.isnan(prs).any()
    assert not pd.isna(pheno.PHENO).any()
    est = np.corrcoef(prs, pheno.PHENO)[1, 0]**2
    if np.isnan(est):
        print(genclump[0:10, :])
        print(prs.compute(), pheno.PHENO)
        print(prs.shape, pheno.shape)
        print(pheno.columns)
        raise Exception
    return est
Ejemplo n.º 8
0
def kendall_tau_nxn(data: da.Array) -> da.Array:
    """
    Kendal Tau correlation calculation of a n x n correlation matrix for n columns
    """
    _, ncols = data.shape

    corrmat = np.zeros(shape=(ncols, ncols))
    corr_list = []
    for i in range(ncols):
        for j in range(i + 1, ncols):
            mask = ~(da.isnan(data[:, i]) | da.isnan(data[:, j]))
            tmp = dask.delayed(lambda a, b: kendalltau(a, b).correlation)(
                data[:, i][mask], data[:, j][mask])
            corr_list.append(tmp)
    corr_comp = dask.compute(*corr_list)  # TODO avoid explicitly compute
    idx = 0
    for i in range(ncols):  # TODO: Optimize by using numpy api
        for j in range(i + 1, ncols):
            corrmat[i][j] = corr_comp[idx]
            idx = idx + 1

    corrmat2 = corrmat + corrmat.T
    np.fill_diagonal(corrmat2, 1)
    corrmat = da.from_array(corrmat2)

    return corrmat
def fill_year_month(inputfile, bucket_name, input_folder, keys, key_idx,
                    cores):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    key_name_header = 's3://' + bucket_name + '/' + input_folder + '/'
    lst_already_downloaded = list(
        bucket.objects.filter(Prefix=input_folder + '/final_panoids_' +
                              inputfile))
    if not (len(lst_already_downloaded) > 0 and lst_already_downloaded[0].key
            == input_folder + '/final_panoids_' + inputfile):
        pts = dd.read_csv(key_name_header + 'panoids_' +
                          os.path.basename(inputfile),
                          blocksize=2000000,
                          header=0)

        print("Filling missing month and year values with " +
              str(pts.npartitions) + "data chunks")
        pts_empty = pts[da.isnan(pts.YEAR) | da.isnan(pts.MONTH)]
        pts_full = pts[~(da.isnan(pts.YEAR) | da.isnan(pts.MONTH))]

        pts_filled = pts_empty.map_partitions(get_month_and_year_from_api,
                                              keys, 0)
        pts = dd.concat([pts_full, pts_filled],
                        axis=0,
                        interleave_partitions=True).compute()
        with fs.open(key_name_header + 'final_panoids_' + inputfile,
                     'wb') as f:
            pts.to_csv(f)
        #pts.to_csv(os.path.join(os.path.join(DATA_FOLDER, os.path.basename(input_folder), 'final_panoids_' + inputfile)), index=False, header=True)
        pts = pts.loc[~(pd.isnull(pts.YEAR) | pd.isnull(pts.MONTH))]

        with fs.open(key_name_header + 'final_panoids_' + inputfile,
                     'wb') as f:
            pts.to_csv(f)
Ejemplo n.º 10
0
    def process_pair(gwast, geno, pheno,  pv, ld):
        print('Computing PRS with R2 of', ld, 'and pvalue threshold of', pv)
        index = gwast[gwast.loc[:, 'pvthr_%.2f' % pv].values]
        index = index.sort_values(by='pvalue', ascending=True).groupby(
            'clumps_%.2f' % ld).first()
        if not index.empty:
            curr_mem = available_memory - psutil.virtual_memory().available
            with ProgressBar(), dask.config.set(memory=curr_mem,
                                                scheduler="single-threaded"):
                sub = geno[:, sorted(index.i.values)]
                genotype = da.ma.masked_array(sub, mask=da.isnan(sub))
                eff_size = da.ma.masked_array(index.slope, mask=da.isnan(
                    index.slope))
                prs = genotype.dot(eff_size).compute()
                # geno[:, index.i.values].dot(index.slope)
            print('PRS done for', ld, 'R2 and a pvalue threshold of', pv)
            pheno = pheno.copy()
            pheno['prs'] = prs
            print('Computing R2 with true phenotype')
            r2 = pheno.reindex(columns=['pheno', 'prs']).corr().loc[
                     'pheno', 'prs'] ** 2
            print(r2)
            return pheno, index, ld, pv, r2

        else:
            print('\tNo variant left after prunning...Skipping')
Ejemplo n.º 11
0
def get_counts(cost_coverage=False):
    """Get cell counts for each category."""

    code_dict = get_codes(cost_coverage)

    # Read in code and conus rasters
    chunks = {"band": 1, "x": 5000, "y": 5000}
    code_path = DP.join("rasters/albers/acre/cost_codes.tif")
    cost_path = DP.join("rasters/albers/acre/rent_map.tif")
    conus_path = DP.join("rasters/albers/acre/masks/conus.tif")
    codes = xr.open_rasterio(code_path, chunks=chunks)[0].data
    costs = xr.open_rasterio(cost_path, chunks=chunks)[0].data
    conus = xr.open_rasterio(conus_path, chunks=chunks)[0].data

    # Dask array's `count_nonzero` counts na values
    codes[da.isnan(codes)] = 0
    conus[da.isnan(conus)] = 0

    # If calculating costs
    if cost_coverage:
        coverage = codes[(costs > 0) | (codes == 9999)]  # No exclusion in cost
    else:
        coverage = codes.copy()

    # Extract code from dictionary
    blm_codes = code_dict["blm"]
    tribal_codes = code_dict["tribal"]
    state_codes = code_dict["state"]
    private_codes = code_dict["private"]

    # Arrays
    developable = conus[codes != 9999]
    dev_covered = coverage[coverage != 9999]
    excl = coverage[coverage == 9999]
    blm = coverage[da.isin(coverage, blm_codes)]
    tribal = coverage[da.isin(coverage, tribal_codes)]
    state = coverage[da.isin(coverage, state_codes)]
    private = coverage[da.isin(coverage, private_codes)]
    arrays = {"excl": excl, "blm": blm, "tribal": tribal, "state": state,
              "private": private, "covered": coverage, "total": conus, 
              "developable": developable, "dev_covered": dev_covered}

    # Collect counts
    counts = {}
    with Client():
        for key, item in tqdm(arrays.items(), position=0):
            counts["n" + key] = da.count_nonzero(item).compute()

    return counts
Ejemplo n.º 12
0
def _kendall_tau_1xn(x: da.Array, data: da.Array) -> da.Array:
    _, ncols = data.shape

    datamask = da.isnan(data)
    xmask = da.isnan(x)[:, 0]

    corrs = []
    for j in range(ncols):
        y = data[:, [j]]

        mask = ~(xmask | datamask[:, j])
        corr = da.from_delayed(kendalltau(x[mask], y[mask]), dtype=np.float, shape=())
        corrs.append(corr)

    return da.stack(corrs)
Ejemplo n.º 13
0
def haversines(x1, x2, y1, y2, z1=None, z2=None):

    x1, x2 = da.deg2rad(x1), da.deg2rad(x2)
    y1, y2 = da.deg2rad(y1), da.deg2rad(y2)

    x = (x2 - x1) * da.cos((y1 + y2) * 0.5) * cst.r_earth
    y = (y2 - y1) * cst.r_earth * da.ones_like(x1) * da.ones_like(x2)

    if z1 is None or z2 is None:
        return da.stack((x, y), axis=-1)
    else:
        z1 = da.where(da.isnan(z1), 0, z1)
        z2 = da.where(da.isnan(z2), 0, z2)
        z = (z2 - z1) * da.ones_like(x)
        return da.stack((x, y, z), axis=-1)
Ejemplo n.º 14
0
def cov(*args, axis=None, **kwargs):
    """
    covariance
    """
    if axis is None:
        args = [x.flatten() for x in args]
        axis = 0

    X = da.stack(args, axis=-1).rechunk(com.CHUNKSIZE)
    cond = da.any(da.isnan(X), axis=-1)
    X = da.where(cond[..., None], np.nan, X)

    X -= da.nanmean(X, axis=axis, keepdims=True)
    X = da.where(da.isnan(X), 0, X)
    return X.swapaxes(axis, -1) @ X.swapaxes(axis,
                                             -2).conj() / (X.shape[axis] - 1)
 def phase_rotation(self, darray, rotation, preview=None):
     """
     Description
     -----------
     Rotate the phase of the seismic data by a specified angle
     
     Parameters
     ----------
     darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
     rotation : Number (degrees), angle of rotation
     
     Keywork Arguments
     -----------------  
     preview : str, enables or disables preview mode and specifies direction
         Acceptable inputs are (None, 'inline', 'xline', 'z')
         Optimizes chunk size in different orientations to facilitate rapid
         screening of algorithm output
     
     Returns
     -------
     result : Dask Array
     """
     
     phi = np.deg2rad(rotation)
     kernel = (1,1,25)
     darray, chunks_init = self.create_array(darray, kernel, preview=preview)
     analytical_trace = darray.map_blocks(signal.hilbert, dtype=darray.dtype)
     result = analytical_trace.real * da.cos(phi) - analytical_trace.imag * da.sin(phi)
     result = util.trim_dask_array(result, kernel)
     result[da.isnan(result)] = 0
     
     return(result)
Ejemplo n.º 16
0
    def gradient_magnitude(self, darray, sigmas=(1, 1, 1), preview=None):
        """
        Description
        -----------
        Compute the 3D Gradient Magnitude using a Gaussian Operator
        
        Parameters
        ----------
        darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
        
        Keywork Arguments
        -----------------  
        sigmas : tuple (len 3), gaussian operator in I, J, K
        preview : str, enables or disables preview mode and specifies direction
            Acceptable inputs are (None, 'inline', 'xline', 'z')
            Optimizes chunk size in different orientations to facilitate rapid
            screening of algorithm output
        
        Returns
        -------
        result : Dask Array
        """

        kernel = tuple(2 * (4 * np.array(sigmas) + 0.5).astype(int) + 1)
        darray, chunks_init = self.create_array(darray,
                                                kernel,
                                                preview=preview)
        result = darray.map_blocks(ndi.gaussian_gradient_magnitude,
                                   sigma=sigmas,
                                   dtype=darray.dtype)
        result = util.trim_dask_array(result, kernel)
        result[da.isnan(result)] = 0

        return (result)
Ejemplo n.º 17
0
    def trace_agc(self, darray, kernel=(1, 1, 9), preview=None):
        """
        Description
        -----------
        Apply an adaptive trace gain to input seismic
        
        Parameters
        ----------
        darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
        
        Keywork Arguments
        -----------------  
        kernel : tuple (len 3), operator size
        preview : str, enables or disables preview mode and specifies direction
            Acceptable inputs are (None, 'inline', 'xline', 'z')
            Optimizes chunk size in different orientations to facilitate rapid
            screening of algorithm output
        
        Returns
        -------
        result : Dask Array
        """

        darray, chunks_init = self.create_array(darray,
                                                kernel,
                                                preview=preview)
        rms = self.rms(darray, kernel)
        rms_max = rms.max()
        result = darray * (1.5 - (rms / rms_max))
        result[da.isnan(result)] = 0

        return (result)
Ejemplo n.º 18
0
    def eig_complex(self, darray, kernel=(3, 3, 9), preview=None):
        """
        Description
        -----------
        Compute multi-trace semblance from 3D seismic incorporating the 
            analytic trace
        
        Parameters
        ----------
        darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
        
        Keywork Arguments
        -----------------  
        kernel : tuple (len 3), operator size
        preview : str, enables or disables preview mode and specifies direction
            Acceptable inputs are (None, 'inline', 'xline', 'z')
            Optimizes chunk size in different orientations to facilitate rapid
            screening of algorithm output
        
        Returns
        -------
        result : Dask Array
        """

        # Function to compute the COV
        def cov(x, ki, kj, kk):
            x = x.reshape((ki * kj, kk))
            x = np.hstack([x.real, x.imag])
            return (x.dot(x.T))

        # Function to extract patches and perform algorithm
        def operation(chunk, kernel):
            np.seterr(all='ignore')
            ki, kj, kk = kernel
            patches = util.extract_patches(chunk, kernel)

            out_data = []
            for i in range(0, patches.shape[0]):
                traces = patches[i]
                traces = traces.reshape(-1, ki * kj * kk)
                cov = np.apply_along_axis(cov, 1, traces, ki, kj, kk)
                vals = np.linalg.eigvals(cov)
                vals = np.abs(vals.max(axis=1) / vals.sum(axis=1))

                out_data.append(vals)

            out_data = np.asarray(out_data).reshape(patches.shape[:3])

            return (out_data)

        # Generate Dask Array as necessary and perform algorithm
        darray, chunks_init = self.create_array(darray, kernel, preview)
        hilbert = darray.map_blocks(util.hilbert, dtype=darray.dtype)
        result = hilbert.map_blocks(operation,
                                    kernel=kernel,
                                    dtype=darray.dtype)
        result = util.trim_dask_array(result, kernel)
        result[da.isnan(result)] = 0

        return (result)
Ejemplo n.º 19
0
    def pod(self, threshold=1, inplace=True):
        """
        Method to change rainfall accumulation arrays to a boolean variable of whether it is dry or not. Unit is mm / day.
        Because we still like to incorporate np.NaN on the unobserved areas, the array has to be floats of 0 and 1
        Default threshold taken from official definition: https://www.ecad.eu/indicesextremes/indicesdictionary.php#3
        """
        if hasattr(self, 'obsd'):
            data = da.where(da.isnan(self.obsd.array.data),
                            self.obsd.array.data,
                            self.obsd.array.data < threshold)
        else:
            data = np.where(np.isnan(self.obs.array), self.obs.array,
                            self.obs.array.data < threshold)

        result = xr.DataArray(data=data,
                              coords=self.obs.array.coords,
                              dims=self.obs.array.dims,
                              attrs={
                                  'long_name': 'probability_of_dryness',
                                  'threshold_mm_day': threshold,
                                  'units': self.old_units,
                                  'new_units': ''
                              },
                              name='-'.join([self.obs.basevar, 'pod']))

        if inplace:
            self.obs.array = result
            self.obs.newvar = 'pod'
        else:
            return (result)
 def reflection_intensity(self, darray, kernel=(1,1,9), preview=None):
     """
     Description
     -----------
     Compute reflection intensity by integrating the trace over a specified window
     
     Parameters
     ----------
     darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
     
     Keywork Arguments
     -----------------  
     kernel : tuple (len 3), operator size
     preview : str, enables or disables preview mode and specifies direction
         Acceptable inputs are (None, 'inline', 'xline', 'z')
         Optimizes chunk size in different orientations to facilitate rapid
         screening of algorithm output
     
     Returns
     -------
     result : Dask Array
     """
     
     # Function to extract patches and perform algorithm
     def operation(chunk, kernel):
         x = util.extract_patches(chunk, (1, 1, kernel[-1]))
         out = np.trapz(x).reshape(x.shape[:3])    
         
         return(out)
     
     darray, chunks_init = self.create_array(darray, kernel, preview=preview)
     result = darray.map_blocks(operation, kernel=kernel, dtype=darray.dtype, chunks=chunks_init)
     result[da.isnan(result)] = 0 
     
     return(result)
Ejemplo n.º 21
0
 def gaussian(self, darray, sigmas=(1, 1, 1), preview=None):
     """
     Description
     -----------
     Perform gaussian smoothing of input seismic
     
     Parameters
     ----------
     darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
     
     Keywork Arguments
     -----------------  
     sigmas : tuple (len 3), smoothing parameters in I, J, K
     preview : str, enables or disables preview mode and specifies direction
         Acceptable inputs are (None, 'inline', 'xline', 'z')
         Optimizes chunk size in different orientations to facilitate rapid
         screening of algorithm output
     
     Returns
     -------
     result : Dask Array
     """
     
     # Generate Dask Array as necessary and perform algorithm
     kernel = tuple((np.array(sigmas) * 2.5).astype(int))        
     darray, chunks_init = self.create_array(darray, kernel, preview=preview)        
     result = darray.map_blocks(ndi.gaussian_filter, sigma=sigmas, dtype=darray.dtype)
     result = util.trim_dask_array(result, kernel)        
     result[da.isnan(result)] = 0
     
     return(result)
Ejemplo n.º 22
0
 def convolution(self, darray, kernel=(3, 3, 3), preview=None):
     """
     Description
     -----------
     Perform convolution smoothing of input seismic data
     
     Parameters
     ----------
     darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
     
     Keywork Arguments
     -----------------  
     kernel : tuple (len 3), operator size in I, J, K
     preview : str, enables or disables preview mode and specifies direction
         Acceptable inputs are (None, 'inline', 'xline', 'z')
         Optimizes chunk size in different orientations to facilitate rapid
         screening of algorithm output
     
     Returns
     -------
     result : Dask Array
     """
     
     # Generate Dask Array as necessary and perform algorithm
     darray, chunks_init = self.create_array(darray, kernel, preview=preview)            
     result = darray.map_blocks(ndi.uniform_filter, size=kernel, dtype=darray.dtype)        
     result = util.trim_dask_array(result, kernel)            
     result[da.isnan(result)] = 0
     
     return(result)
Ejemplo n.º 23
0
def compute_maf(X):
    r"""Compute minor allele frequencies.

    It assumes that ``X`` encodes 0, 1, and 2 representing the number
    of alleles (or dosage), or ``NaN`` to represent missing values.

    Parameters
    ----------
    X : array_like
        Genotype matrix.

    Returns
    -------
    array_like
        Minor allele frequencies.

    Examples
    --------
    .. doctest::

        >>> from numpy.random import RandomState
        >>> from limix.qc import compute_maf
        >>>
        >>> random = RandomState(0)
        >>> X = random.randint(0, 3, size=(100, 10))
        >>>
        >>> print(compute_maf(X)) # doctest: +FLOAT_CMP
        [0.49  0.49  0.445 0.495 0.5   0.45  0.48  0.48  0.47  0.435]
    """
    import dask.array as da
    import xarray as xr
    from pandas import DataFrame
    from numpy import isnan, logical_not, minimum, nansum

    if isinstance(X, da.Array):
        s0 = da.nansum(X, axis=0).compute()
        denom = 2 * (X.shape[0] - da.isnan(X).sum(axis=0)).compute()
    elif isinstance(X, DataFrame):
        s0 = X.sum(axis=0, skipna=True)
        denom = 2 * logical_not(X.isna()).sum(axis=0)
    elif isinstance(X, xr.DataArray):
        if "sample" in X.dims:
            kwargs = {"dim": "sample"}
        else:
            kwargs = {"axis": 0}
        s0 = X.sum(skipna=True, **kwargs)
        denom = 2 * logical_not(isnan(X)).sum(**kwargs)
    else:
        s0 = nansum(X, axis=0)
        denom = 2 * logical_not(isnan(X)).sum(axis=0)

    s0 = s0 / denom
    s1 = 1 - s0
    maf = minimum(s0, s1)

    if hasattr(maf, "name"):
        maf.name = "maf"

    return maf
Ejemplo n.º 24
0
 def redistribute(pop, target, default):
     # Redistribute population proportionally at the target locations
     redistribution = target / target.sum()
     # If there are no target locations, all redistribution values will be inf.
     # In this case, use the default
     return da.where(
         da.isinf(redistribution) | da.isnan(redistribution), default,
         pop * redistribution)
Ejemplo n.º 25
0
def scatter_with_regression(
    x: da.Array,
    y: da.Array,
    sample_size: int,
    k: Optional[int] = None
) -> Tuple[Tuple[da.Array, da.Array], Tuple[da.Array, da.Array],
           Optional[da.Array]]:
    """Calculate pearson correlation on 2 given arrays.

    Parameters
    ----------
    xarr : da.Array
    yarr : da.Array
    sample_size : int
    k : Optional[int] = None
        Highlight k points which influence pearson correlation most
    """
    if k == 0:
        raise ValueError("k should be larger than 0")

    xp1 = da.vstack([x, da.ones_like(x)]).T
    xp1 = xp1.rechunk((xp1.chunks[0], -1))

    mask = ~(da.isnan(x) | da.isnan(y))
    # if chunk size in the first dimension is 1, lstsq will use sfqr instead of tsqr,
    # where the former does not support nan in shape.

    if len(xp1.chunks[0]) == 1:
        xp1 = xp1.rechunk((2, -1))
        y = y.rechunk((2, -1))
        mask = mask.rechunk((2, -1))

    (coeffa, coeffb), _, _, _ = da.linalg.lstsq(xp1[mask], y[mask])

    if sample_size < x.shape[0]:
        samplesel = da.random.choice(x.shape[0],
                                     int(sample_size),
                                     chunks=x.chunksize)
        x = x[samplesel]
        y = y[samplesel]

    if k is None:
        return (coeffa, coeffb), (x, y), None

    influences = pearson_influence(x, y)
    return (coeffa, coeffb), (x, y), influences
Ejemplo n.º 26
0
def build_exclusions():
    """ Build exclusion file for the WETO rent map."""

    # Using the 'core exclusions raster' that antyhony put together.
    excl = xr.open_rasterio(EXL_PATH, chunks=CHUNKS)
    roads = xr.open_rasterio(ROAD_PATH, chunks=CHUNKS)
    conus = xr.open_rasterio(CONUS_PATH, chunks=CHUNKS)
    rails = xr.open_rasterio(RAIL_PATH, chunks=CHUNKS)

    # Different na values every time :/
    roadsna = roads.attrs["nodatavals"][0]
    conusna = conus.attrs["nodatavals"][0]
    railsna = rails.attrs["nodatavals"][0]

    # Get just the data arrays
    excl = excl[0].data
    roads = roads[0].data
    conus = conus[0].data
    rails = rails[0].data

    # Set nodata values to 0
    excl[da.isnan(excl)] = 0
    roads[roads == roadsna] = 0
    conus[conus == conusna] = 0
    rails[rails == railsna] = 0

    # We need to reverse the original exclusions
    excl = (excl - 1) * -1

    # Combine roads
    excl = da.stack([excl, roads, rails], axis=0).max(axis=0)

    # And let's make exclusion values 9999 since 1 will be a code
    excl[excl == 1] = 9999

    # And cut out just CONUS for mapping
    excl = excl * conus

    # Compute
    print("Combining exclusion layers...")
    with Client():
        excl = excl.compute()

    # save to raster
    print("Saving to 90 meter reV grid...")
    to_raster(excl, DP.join("rasters", "rent_exclusions.tif"),
              template=EXL_PATH, compress="deflate")

    # warp to acre grid in north american albers equal area conic
    print("Warping to acre grid...")
    res = 63.614907234075254
    warp(DP.join("rasters", "rent_exclusions.tif"),
         DP.join("rasters", "albers", "acre", "rent_exclusions.tif"),
         xRes=res,
         yRes=res,
         overwrite=True)

    print("Done.")
Ejemplo n.º 27
0
def scatter_with_regression(
    xarr: da.Array,
    yarr: da.Array,
    sample_size: int,
    k: Optional[int] = None
) -> Tuple[Tuple[float, float], dd.DataFrame, Optional[np.ndarray]]:
    """
    Calculate pearson correlation on 2 given arrays.

    Parameters
    ----------
    xarr : da.Array
    yarr : da.Array
    sample_size : int
    k : Optional[int] = None
        Highlight k points which influence pearson correlation most

    Returns
    -------
    Intermediate
    """
    if k == 0:
        raise ValueError("k should be larger than 0")

    mask = ~(da.isnan(xarr) | da.isnan(yarr))
    xarr = da.from_array(np.array(xarr)[mask])
    yarr = da.from_array(np.array(yarr)[mask])
    xarrp1 = da.vstack([xarr, da.ones_like(xarr)]).T
    xarrp1 = xarrp1.rechunk((xarrp1.chunks[0], -1))
    (coeffa, coeffb), _, _, _ = da.linalg.lstsq(xarrp1, yarr)

    if sample_size < len(xarr):
        samplesel = np.random.choice(len(xarr), int(sample_size))
        xarr = xarr[samplesel]
        yarr = yarr[samplesel]

    df = dd.concat([dd.from_dask_array(arr) for arr in [xarr, yarr]], axis=1)
    df.columns = ["x", "y"]

    if k is None:
        return (coeffa, coeffb), df, None

    influences = pearson_influence(xarr, yarr)
    return (coeffa, coeffb), df, influences
Ejemplo n.º 28
0
def _unequal_var_ttest_denom(v1, n1, v2, n2):
    vn1 = v1 / n1
    vn2 = v2 / n2
    with np.errstate(divide="ignore", invalid="ignore"):
        df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))

    # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
    # Hence it doesn't matter what df is as long as it's not NaN.
    df = da.where(da.isnan(df), 1, df)  # XXX: np -> da
    denom = da.sqrt(vn1 + vn2)
    return df, denom
Ejemplo n.º 29
0
def _unequal_var_ttest_denom(v1, n1, v2, n2):
    vn1 = v1 / n1
    vn2 = v2 / n2
    with np.errstate(divide='ignore', invalid='ignore'):
        df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1))

    # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0).
    # Hence it doesn't matter what df is as long as it's not NaN.
    df = da.where(da.isnan(df), 1, df)  # XXX: np -> da
    denom = da.sqrt(vn1 + vn2)
    return df, denom
Ejemplo n.º 30
0
def _true_color_dask(r, g, b, nodata, c, th):
    pixel_max = 255

    alpha = da.where(da.logical_or(da.isnan(r), r <= nodata), 0,
                     pixel_max).astype(np.uint8)

    red = (_normalize_data(r, pixel_max, c, th)).astype(np.uint8)
    green = (_normalize_data(g, pixel_max, c, th)).astype(np.uint8)
    blue = (_normalize_data(b, pixel_max, c, th)).astype(np.uint8)

    out = da.stack([red, green, blue, alpha], axis=-1)
    return out
    def response_frequency(self, darray, sample_rate=4, preview=None):
        """
        Description
        -----------
        Compute the Response Frequency of the input data
        
        Parameters
        ----------
        darray : Array-like, acceptable inputs include Numpy, HDF5, or Dask Arrays
        
        Keywork Arguments
        -----------------  
        sample_rate : Number, sample rate in milliseconds (ms)
        preview : str, enables or disables preview mode and specifies direction
            Acceptable inputs are (None, 'inline', 'xline', 'z')
            Optimizes chunk size in different orientations to facilitate rapid
            screening of algorithm output
        
        Returns
        -------
        result : Dask Array
        """
        def operation(chunk1, chunk2, chunk3):

            out = np.zeros(chunk1.shape)
            for i, j in np.ndindex(out.shape[:-1]):

                ints = np.unique(chunk3[i, j, :])

                for ii in ints:

                    idx = np.where(chunk3[i, j, :] == ii)[0]
                    peak = idx[chunk1[i, j, idx].argmax()]
                    out[i, j, idx] = chunk2[i, j, peak]

            return (out)

        darray, chunks_init = self.create_array(darray, preview=preview)
        env = self.envelope(darray)
        inst_freq = self.instantaneous_frequency(darray, sample_rate)
        troughs = env.map_blocks(util.local_events,
                                 comparator=np.less,
                                 dtype=darray.dtype)
        troughs = troughs.cumsum(axis=-1)
        result = da.map_blocks(operation,
                               env,
                               inst_freq,
                               troughs,
                               dtype=darray.dtype)
        result[da.isnan(result)] = 0

        return (result)
Ejemplo n.º 32
0
    def _method_c(self, sr, il, cos_z, nodata_samps, min_samples, n_jobs, robust, band_coeffs, band):

        r"""
        Normalizes terrain using the C-correction method

        Args:
            sr (Dask Array): The surface reflectance data.
            il (Dask Array): The solar illumination.
            cos_z (Dask Array): The cosine of the solar zenith angle.
            nodata_samps (Dask Array): Samples where 1='no data' and 0='valid data'.
            min_samples (Optional[int]): The minimum number of samples required to fit a regression.
            n_jobs (Optional[int]): The number of parallel workers for ``LinearRegression.fit`` or
                ``TheilSenRegressor.fit``.
            robust (Optional[bool]): Whether to fit a robust regression.
            band_coeffs (dict): Slope and intercept coefficients for each band.
            band (int | str): The band.

        References:

            See :cite:`teillet_etal_1982` for the C-correction method.

        Returns:
            ``dask.array``
        """

        nodata = nodata_samps.compute().flatten()
        idx = np.where(nodata == 0)[0]

        if idx.shape[0] < min_samples:
            return sr

        X = il.compute().flatten()[idx][:, np.newaxis]

        if band_coeffs:
            slope_m, intercept_b = band_coeffs[band]
        else:

            y = sr.compute().flatten()[idx]

            slope_m, intercept_b = self._regress_a(X, y, robust, n_jobs)

        c = intercept_b / slope_m

        # Get the A-factor
        a_factor = (cos_z + c) / (il + c)

        a_factor = da.where(da.isnan(a_factor), 1, a_factor)

        sr_a = sr * a_factor

        return da.where((sr_a > 1) | (nodata_samps == 1), sr, sr_a).clip(0, 1)
Ejemplo n.º 33
0
def test_dtype_complex():
    x = np.arange(24).reshape((4, 6)).astype('f4')
    y = np.arange(24).reshape((4, 6)).astype('i8')
    z = np.arange(24).reshape((4, 6)).astype('i2')

    a = da.from_array(x, chunks=(2, 3))
    b = da.from_array(y, chunks=(2, 3))
    c = da.from_array(z, chunks=(2, 3))

    def eq(a, b):
        return (isinstance(a, np.dtype) and
                isinstance(b, np.dtype) and
                str(a) == str(b))

    assert eq(a._dtype, x.dtype)
    assert eq(b._dtype, y.dtype)

    assert eq((a + 1)._dtype, (x + 1).dtype)
    assert eq((a + b)._dtype, (x + y).dtype)
    assert eq(a.T._dtype, x.T.dtype)
    assert eq(a[:3]._dtype, x[:3].dtype)
    assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype)

    assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype)
    assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype)

    assert eq(b.std()._dtype, y.std().dtype)
    assert eq(c.sum()._dtype, z.sum().dtype)
    assert eq(a.min()._dtype, a.min().dtype)
    assert eq(b.std()._dtype, b.std().dtype)
    assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype)

    assert eq(da.sin(c)._dtype, np.sin(z).dtype)
    assert eq(da.exp(b)._dtype, np.exp(y).dtype)
    assert eq(da.floor(a)._dtype, np.floor(x).dtype)
    assert eq(da.isnan(b)._dtype, np.isnan(y).dtype)
    with ignoring(ImportError):
        assert da.isnull(b)._dtype == 'bool'
        assert da.notnull(b)._dtype == 'bool'

    x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
    d = da.from_array(x, chunks=(1,))

    assert eq(d['text']._dtype, x['text'].dtype)
    assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
Ejemplo n.º 34
0
def test_arithmetic():
    x = np.arange(5).astype('f4') + 2
    y = np.arange(5).astype('i8') + 2
    z = np.arange(5).astype('i4') + 2
    a = da.from_array(x, chunks=(2,))
    b = da.from_array(y, chunks=(2,))
    c = da.from_array(z, chunks=(2,))
    assert eq(a + b, x + y)
    assert eq(a * b, x * y)
    assert eq(a - b, x - y)
    assert eq(a / b, x / y)
    assert eq(b & b, y & y)
    assert eq(b | b, y | y)
    assert eq(b ^ b, y ^ y)
    assert eq(a // b, x // y)
    assert eq(a ** b, x ** y)
    assert eq(a % b, x % y)
    assert eq(a > b, x > y)
    assert eq(a < b, x < y)
    assert eq(a >= b, x >= y)
    assert eq(a <= b, x <= y)
    assert eq(a == b, x == y)
    assert eq(a != b, x != y)

    assert eq(a + 2, x + 2)
    assert eq(a * 2, x * 2)
    assert eq(a - 2, x - 2)
    assert eq(a / 2, x / 2)
    assert eq(b & True, y & True)
    assert eq(b | True, y | True)
    assert eq(b ^ True, y ^ True)
    assert eq(a // 2, x // 2)
    assert eq(a ** 2, x ** 2)
    assert eq(a % 2, x % 2)
    assert eq(a > 2, x > 2)
    assert eq(a < 2, x < 2)
    assert eq(a >= 2, x >= 2)
    assert eq(a <= 2, x <= 2)
    assert eq(a == 2, x == 2)
    assert eq(a != 2, x != 2)

    assert eq(2 + b, 2 + y)
    assert eq(2 * b, 2 * y)
    assert eq(2 - b, 2 - y)
    assert eq(2 / b, 2 / y)
    assert eq(True & b, True & y)
    assert eq(True | b, True | y)
    assert eq(True ^ b, True ^ y)
    assert eq(2 // b, 2 // y)
    assert eq(2 ** b, 2 ** y)
    assert eq(2 % b, 2 % y)
    assert eq(2 > b, 2 > y)
    assert eq(2 < b, 2 < y)
    assert eq(2 >= b, 2 >= y)
    assert eq(2 <= b, 2 <= y)
    assert eq(2 == b, 2 == y)
    assert eq(2 != b, 2 != y)

    assert eq(-a, -x)
    assert eq(abs(a), abs(x))
    assert eq(~(a == b), ~(x == y))
    assert eq(~(a == b), ~(x == y))

    assert eq(da.logaddexp(a, b), np.logaddexp(x, y))
    assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
    assert eq(da.exp(b), np.exp(y))
    assert eq(da.log(a), np.log(x))
    assert eq(da.log10(a), np.log10(x))
    assert eq(da.log1p(a), np.log1p(x))
    assert eq(da.expm1(b), np.expm1(y))
    assert eq(da.sqrt(a), np.sqrt(x))
    assert eq(da.square(a), np.square(x))

    assert eq(da.sin(a), np.sin(x))
    assert eq(da.cos(b), np.cos(y))
    assert eq(da.tan(a), np.tan(x))
    assert eq(da.arcsin(b/10), np.arcsin(y/10))
    assert eq(da.arccos(b/10), np.arccos(y/10))
    assert eq(da.arctan(b/10), np.arctan(y/10))
    assert eq(da.arctan2(b*10, a), np.arctan2(y*10, x))
    assert eq(da.hypot(b, a), np.hypot(y, x))
    assert eq(da.sinh(a), np.sinh(x))
    assert eq(da.cosh(b), np.cosh(y))
    assert eq(da.tanh(a), np.tanh(x))
    assert eq(da.arcsinh(b*10), np.arcsinh(y*10))
    assert eq(da.arccosh(b*10), np.arccosh(y*10))
    assert eq(da.arctanh(b/10), np.arctanh(y/10))
    assert eq(da.deg2rad(a), np.deg2rad(x))
    assert eq(da.rad2deg(a), np.rad2deg(x))

    assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
    assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
    assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
    assert eq(da.logical_not(a < 1), np.logical_not(x < 1))
    assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
    assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
    assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
    assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))

    assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
    assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
    assert eq(da.isfinite(a), np.isfinite(x))
    assert eq(da.isinf(a), np.isinf(x))
    assert eq(da.isnan(a), np.isnan(x))
    assert eq(da.signbit(a - 3), np.signbit(x - 3))
    assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
    assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
    assert eq(da.ldexp(c, c), np.ldexp(z, z))
    assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
    assert eq(da.floor(a * 0.5), np.floor(x * 0.5))
    assert eq(da.ceil(a), np.ceil(x))
    assert eq(da.trunc(a / 2), np.trunc(x / 2))

    assert eq(da.degrees(b), np.degrees(y))
    assert eq(da.radians(a), np.radians(x))

    assert eq(da.rint(a + 0.3), np.rint(x + 0.3))
    assert eq(da.fix(a - 2.5), np.fix(x - 2.5))

    assert eq(da.angle(a + 1j), np.angle(x + 1j))
    assert eq(da.real(a + 1j), np.real(x + 1j))
    assert eq((a + 1j).real, np.real(x + 1j))
    assert eq(da.imag(a + 1j), np.imag(x + 1j))
    assert eq((a + 1j).imag, np.imag(x + 1j))
    assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
    assert eq((a + 1j * b).conj(), (x + 1j * y).conj())

    assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
    assert eq(da.fabs(b), np.fabs(y))
    assert eq(da.sign(b - 2), np.sign(y - 2))

    l1, l2 = da.frexp(a)
    r1, r2 = np.frexp(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    l1, l2 = da.modf(a)
    r1, r2 = np.modf(x)
    assert eq(l1, r1)
    assert eq(l2, r2)

    assert eq(da.around(a, -1), np.around(x, -1))