Ejemplo n.º 1
0
def test_gradient_xarray_implicit_axes(test_da_xy):
    """Test the 2D gradient calculation with a 2D DataArray and no axes specified."""
    data = test_da_xy.isel(time=0, isobaric=2)
    deriv_y, deriv_x = gradient(data)

    truth_x = xr.full_like(data, -6.993007e-07)
    truth_x.attrs['units'] = 'kelvin / meter'

    truth_y = xr.full_like(data, -2.797203e-06)
    truth_y.attrs['units'] = 'kelvin / meter'

    xr.testing.assert_allclose(deriv_x, truth_x)
    assert deriv_x.metpy.units == truth_x.metpy.units

    xr.testing.assert_allclose(deriv_y, truth_y)
    assert deriv_y.metpy.units == truth_y.metpy.units
Ejemplo n.º 2
0
def test_first_derivative_xarray_time_and_default_axis(test_da_xy):
    """Test first derivative with an xarray.DataArray over time as default first dimension."""
    deriv = first_derivative(test_da_xy)
    truth = xr.full_like(test_da_xy, -0.000777000777)
    truth.attrs['units'] = 'kelvin / second'

    xr.testing.assert_allclose(deriv, truth)
    assert deriv.metpy.units == truth.metpy.units
Ejemplo n.º 3
0
def test_gradient_xarray(test_da_xy):
    """Test the 3D gradient calculation with a 4D DataArray in each axis usage."""
    deriv_x, deriv_y, deriv_p = gradient(test_da_xy, axes=('x', 'y', 'isobaric'))
    deriv_x_alt1, deriv_y_alt1, deriv_p_alt1 = gradient(test_da_xy,
                                                        axes=('x', 'y', 'vertical'))
    deriv_x_alt2, deriv_y_alt2, deriv_p_alt2 = gradient(test_da_xy, axes=(3, 2, 1))

    truth_x = xr.full_like(test_da_xy, -6.993007e-07)
    truth_x.attrs['units'] = 'kelvin / meter'

    truth_y = xr.full_like(test_da_xy, -2.797203e-06)
    truth_y.attrs['units'] = 'kelvin / meter'

    partial = xr.DataArray(
        np.array([0.04129204, 0.03330003, 0.02264402]),
        coords=(('isobaric', test_da_xy['isobaric']),)
    )
    _, truth_p = xr.broadcast(test_da_xy, partial)
    truth_p.coords['crs'] = test_da_xy['crs']
    truth_p.attrs['units'] = 'kelvin / hectopascal'

    # Assert results match expectations
    xr.testing.assert_allclose(deriv_x, truth_x)
    assert deriv_x.metpy.units == truth_x.metpy.units
    xr.testing.assert_allclose(deriv_y, truth_y)
    assert deriv_y.metpy.units == truth_y.metpy.units
    xr.testing.assert_allclose(deriv_p, truth_p)
    assert deriv_p.metpy.units == truth_p.metpy.units

    # Assert alternative specifications give same results
    xr.testing.assert_identical(deriv_x_alt1, deriv_x)
    xr.testing.assert_identical(deriv_y_alt1, deriv_y)
    xr.testing.assert_identical(deriv_p_alt1, deriv_p)
    xr.testing.assert_identical(deriv_x_alt2, deriv_x)
    xr.testing.assert_identical(deriv_y_alt2, deriv_y)
    xr.testing.assert_identical(deriv_p_alt2, deriv_p)
Ejemplo n.º 4
0
def tx90p(tasmax, t90, freq="YS"):
    r"""Number of days with daily maximum temperature over the 90th percentile.

    Number of days with daily maximum temperature over the 90th percentile.

    Parameters
    ----------
    tasmax : xarray.DataArray
      Maximum daily temperature [℃] or [K]
    t90 : xarray.DataArray
      90th percentile of daily maximum temperature [℃] or [K]
    freq : str, optional
      Resampling frequency

    Returns
    -------
    xarray.DataArray
      Count of days with daily maximum temperature below the 10th percentile [days]

    Notes
    -----
    The 90th percentile should be computed for a 5 day window centered on each calendar day for a reference period.

    Example
    -------
    >>> t90 = percentile_doy(historical_tas, per=0.9)
    >>> hot_days = tg90p(tas, t90)
    """
    if "dayofyear" not in t90.coords.keys():
        raise AttributeError("t10 should have dayofyear coordinates.")

    t90 = utils.convert_units_to(t90, tasmax)

    # adjustment of t90 to tas doy range
    t90 = utils.adjust_doy_calendar(t90, tasmax)

    # create array of percentile with tas shape and coords
    thresh = xr.full_like(tasmax, np.nan)
    doy = thresh.time.dt.dayofyear.values
    thresh.data = t90.sel(dayofyear=doy)

    # compute the cold days
    over = tasmax > thresh

    return over.resample(time=freq).sum(dim="time")
Ejemplo n.º 5
0
    def get_dataset(self, **kwargs):
        """ get a dataset with the goods

        Parameters
        ----------
        kwargs : dict, optional
            passed to xmitgcm.open_mdsdataset

        Returns
        -------
        ds : xarray.Dataset
            with cost, ctrl, and sensitivity fields by iteration
        """
        cost = self.read_cost_function()
        ctrl = self.read_the_xx(**kwargs)
        both = xr.merge([cost, ctrl])

        # flag the Newton's
        _, simuls = self.read_m1qn3()
        both['simulIsIter'] = xr.full_like(both.simul, True, dtype=bool)
        for mysimul in both.simul.values:
            if mysimul not in simuls:
                both['simulIsIter'].loc[{'simul': mysimul}] = False
        iters = []
        i = 0
        for k in both.simul.values:
            if both.simulIsIter.sel(simul=k).values:
                myiter = i
                i += 1
            else:
                myiter = np.nan
            iters.append(myiter)
        both['iters'] = xr.DataArray(np.asarray(iters),
                                     coords=both.simul.coords,
                                     dims=both.simul.dims)
        both = both.set_coords('iters')

        # cleanup
        both['simulIsIter'] = xr.where(np.isnan(both['simulIsIter']), False,
                                       both['simulIsIter']).astype(bool)
        both['iters'] = both.iters.where(both.simulIsIter)
        both['simulAtMaxIter'] = both.simul.where(
            both.simulIsIter).max('simul')

        return both
Ejemplo n.º 6
0
def tn10p(tasmin, t10, freq='YS'):
    r"""Number of days with daily minimum temperature below the 10th percentile.

    Number of days with daily minimum temperature below the 10th percentile.

    Parameters
    ----------

    tasmin : xarray.DataArray
      Mean daily temperature [℃] or [K]
    t10 : xarray.DataArray
      10th percentile of daily minimum temperature [℃] or [K]
    freq : str, optional
      Resampling frequency

    Returns
    -------
    xarray.DataArray
      Count of days with daily minimum temperature below the 10th percentile [days]

    Notes
    -----
    The 10th percentile should be computed for a 5 day window centered on each calendar day for a reference period.

    Example
    -------
    >>> t10 = percentile_doy(historical_tas, per=0.1)
    >>> cold_days = tg10p(tas, t10)
    """
    if 'dayofyear' not in t10.coords.keys():
        raise AttributeError("t10 should have dayofyear coordinates.")
    t10 = utils.convert_units_to(t10, tasmin)

    # adjustment of t10 to tas doy range
    t10 = utils.adjust_doy_calendar(t10, tasmin)

    # create array of percentile with tas shape and coords
    thresh = xr.full_like(tasmin, np.nan)
    doy = thresh.time.dt.dayofyear.values
    thresh.data = t10.sel(dayofyear=doy)

    # compute the cold days
    below = (tasmin < thresh)

    return below.resample(time=freq).sum(dim='time')
Ejemplo n.º 7
0
Archivo: stats.py Proyecto: coecms/xmhw
def rank_variable(array):
    """Rank an array assigning values 1,2,3 ... to array elements going
    from biggest to smallest.

    Parameters
    ----------
    array: xarray DataArray
        input variable

    Returns
    -------
    rank:
        a rank order array
    """
    rank_values = len(array) - array.values.argsort().argsort()
    rank = xr.full_like(array, np.nan)
    rank[:] = rank_values
    return rank
Ejemplo n.º 8
0
 def reduce_chunked(self, xs, output):
     """Computes the maximum across a chunk
     
     Parameters
     ----------
     xs : iterable
         Iterable of sources
     
     Returns
     -------
     UnitsDataArray
         Maximum of the source data over dims
     """
     # note: np.fmax ignores NaNs, np.maximum propagates NaNs
     y = xr.full_like(output, np.nan)
     for x in xs:
         y = np.fmax(y, x.max(dim=self._dims))
     return y
Ejemplo n.º 9
0
def full_like(other, fill_value=np.nan, add_coords={}, replace_vars=[]):
    data = xr.full_like(other, fill_value)
    if isinstance(data, xr.DataArray):
        data = data.to_dataset()
    for k, v in add_coords.items():
        data[k] = v
        data[k].assign_coords()

    if replace_vars:
        data = data.drop(data.data_vars.keys())
        dims = data.dims.keys()
        shape = tuple(data.dims.values())
        empty = np.zeros(shape=shape)
        empty[empty == 0] = fill_value
        empty = xr.DataArray(empty, coords=data.coords, dims=data.dims)
        for var in replace_vars:
            data[var] = empty.copy()
    return data
Ejemplo n.º 10
0
def align_concat_fill(*args, **kwargs):
    #
    # Put Datasets together
    #
    import numpy as np
    import xarray as xr

    xr.set_options(keep_attrs=True)
    #
    # align dimensions
    #
    args = xr.align(*args, join='outer', copy=False)
    message("Alignment completed", **kwargs)
    #
    # align datavars
    #
    dummy = xr.Dataset()
    for iset in args:
        ivars = list(iset.data_vars)
        for ivar in ivars:
            if ivar not in dummy.data_vars:
                dummy[ivar] = xr.full_like(iset[ivar],
                                           np.nan)  # copies attributes as well

    message("Initialization completed", dummy, **kwargs)
    #
    # add missing variables
    #
    for iset in args:
        for ivar in list(dummy.data_vars):
            if ivar not in iset.data_vars:
                iset[ivar] = dummy[ivar]
    #
    # concat
    #
    out = xr.concat(args, **kwargs)
    message("Concatenation completed", **kwargs)
    #
    # restore some Attributes, as they are removed by concat 'equal'
    #
    for ivar in out.data_vars:
        out[ivar].attrs.update(dict(dummy[ivar].attrs))
    return out
Ejemplo n.º 11
0
def time_discretization(model,
                        max_perlen,
                        endtime,
                        starttime=None,
                        n_timesteps_p1=1,
                        n_timesteps_rest=1,
                        **kwargs):
    """
    Collect all unique times and subdivide. Adapted from the function in imod/wq/model.py
    """

    model.use_cftime = model._use_cftime()

    times = []
    for pkg in model.values():
        if "time" in pkg.coords:
            times.append(pkg["time"].values)

    # TODO: check that endtime is later than all other times.
    times.append(imod.wq.timeutil.to_datetime(endtime, model.use_cftime))
    if starttime is not None:
        times.append(imod.wq.timeutil.to_datetime(starttime, model.use_cftime))

    # np.unique also sorts
    times = np.unique(np.hstack(times))
    duration = imod.wq.timeutil.timestep_duration(times, use_cftime=True)

    # Update times, ensuring that max_perlen is not exceeded.
    nper_extra = [int(d / (max_perlen * 365.25)) for d in duration]
    nu_times = add_timesteps(max_perlen, times, nper_extra)
    nu_duration = imod.wq.timeutil.timestep_duration(nu_times, use_cftime=True)
    # Generate time discretization, just rely on default arguments
    # Probably won't be used that much anyway?
    timestep_duration = xr.DataArray(nu_duration,
                                     coords={"time": np.array(nu_times)[:-1]},
                                     dims=("time", ))

    n_timesteps = xr.full_like(timestep_duration,
                               n_timesteps_rest).astype(np.int64)
    n_timesteps[0] = n_timesteps_p1

    model["time_discretization"] = imod.wq.TimeDiscretization(
        timestep_duration=timestep_duration, n_timesteps=n_timesteps, **kwargs)
Ejemplo n.º 12
0
def add_empty_first_date_array(ds, time_slice, first_query_date):
    def create_new_date_string(first_query_date, time_slice):
        yr = first_query_date.split('-')[0]
        mt = '{:02d}'.format(time_slice)
        dy = first_query_date.split('-')[2]
        return yr + '-' + mt + '-' + dy

    # tell user
    print(
        'Inserting empty first date, if required, to improve resampling. Please wait.'
    )

    # for specific month or season, correct month
    if time_slice == 'q1':
        first_query_date = np.datetime64(
            create_new_date_string(first_query_date, 1))
    elif time_slice == 'q2':
        first_query_date = np.datetime64(
            create_new_date_string(first_query_date, 4))
    elif time_slice == 'q3':
        first_query_date = np.datetime64(
            create_new_date_string(first_query_date, 7))
    elif time_slice == 'q4':
        first_query_date = np.datetime64(
            create_new_date_string(first_query_date, 10))
    elif isinstance(time_slice, int):
        first_query_date = np.datetime64(
            create_new_date_string(first_query_date, time_slice))
    else:
        first_query_date = np.datetime64(first_query_date)

    # check if already exists
    if ds['time'].isel(time=0) == first_query_date:
        return ds

    # generate empty and modify date
    empty_date = xr.full_like(ds.isel(time=0), fill_value=np.nan)
    empty_date['time'] = first_query_date

    # combine and sort
    ds = xr.concat([ds, empty_date], dim='time').sortby('time')

    return ds
Ejemplo n.º 13
0
    def met_data(self):
        if self._met_data is None:
            if self.domain is None:
                self._domain = io.read_domain(self.params).isel(
                    **self._domain_slice)
            self._met_data = io.read_met_data(self.params, self._domain)
            self._met_data['elev'] = self.domain['elev']
            self._met_data['lat'] = self.domain['lat']
            self._met_data['lon'] = self.domain['lon']

            # process constant_vars
            constant_vars = self.params.get('constant_vars', None)
            if constant_vars:
                da_template = self._met_data[list(self._met_data)[0]]
                for var in constant_vars.keys():
                    self._met_data[var] = xr.full_like(da_template,
                                                       float(constant_vars[var]))

            self._validate_force_times(force_times=self._met_data['time'])
        return self._met_data
Ejemplo n.º 14
0
    def get_variable(self, run: str, varname: Hashable) -> xr.DataArray:
        """Query a collection of diagnostics for a given run and variable

        Args:
            diagnostics: list of xarray datasets, each with a "run" attribute
            varname: variable to exctract from the expected run

        Returns:
            varname of run if present, otherwise nans with the expected
            metadata

        """
        if varname in self._varnames[run]:
            return self._get_run(run)[varname]
        else:
            for run in self._varnames:
                if varname in self._varnames[run]:
                    template = self._get_run(run)[varname]
                    return xr.full_like(template, np.nan)
            raise ValueError(f"{varname} not found.")
Ejemplo n.º 15
0
def map_property_onto_objects(objects, object_file, property):
    base_name, objects_mask = object_file.split('.objects.')

    object_properties = genesis.objects.get_data(base_name, mask_identifier=objects_mask)
    object_property = object_properties[property]
    N_objects = len(object_properties.object_id)

    properties_mapped = xr.full_like(objects, fill_value=np.nan, dtype=object_property.dtype)
    properties_mapped.attrs.update(object_property.attrs)
    properties_mapped.name = object_property.name

    print("Mapping {} onto {} objects...".format(property, N_objects))

    for object_id in tqdm.tqdm(object_properties.object_id):
        if object_id == 0:
            continue
        v = object_property.sel(object_id=object_id).values
        properties_mapped = properties_mapped.where(objects != object_id, other=v)

    return properties_mapped
Ejemplo n.º 16
0
	def run(self):
		if self.profiles_data is None:
			self.prepare_data()
		self.main_flag = xr.full_like(self.profiles_data['Temperature'], 2,
									  dtype='i8')
		self.main_flag.name = "Main flag"
		indx_fail = np.array([], dtype='i8')
		indx_suspect = np.array([], dtype='i8')
		indx_pass = np.array([], dtype='i8')
		for test in self.qctests:
			self.qctests[test].apply(self.profiles_data)
			flag = self.qctests[test].get_flag()
			self.flag_dict[test] = flag
			indx_fail = np.append(indx_fail, np.argwhere(flag.data == 4))
			indx_suspect = np.append(indx_suspect, np.argwhere(flag.data == 3))
			indx_pass = np.append(indx_pass, np.argwhere(flag.data == 1))

		self.main_flag.data[np.unique(indx_pass)] = 1
		self.main_flag.data[np.unique(indx_suspect)] = 3
		self.main_flag.data[np.unique(indx_fail)] = 4
Ejemplo n.º 17
0
def get_area_domain(dset, directory='./', write=0, radius=6378.137):
    import xarray as xr
    import numpy as np
    from math import pi, cos
    """Input is a VOLCAT dataset. Use after volcat.open_dataset(fname). 
       Provide directory for where area netcdf should be located. Default is cwd.
       Default to NOT write area array to netcdf file.
       Output is xarray of same size with corresponding area (km2) of each grid cell.
       Converts degrees to meters using a radius of 6378.137 km."""

    d2r = pi / 180.0  #convert degrees to radians
    d2km = radius * d2r  #convert degree latitude to kilometers.

    ash_mass = dset.ash_mass  #Pulls out ash mass array
    ash_mass = ash_mass[0, :, :]  #Removes time dimension
    lat = ash_mass.latitude
    lon = ash_mass.longitude
    latrad = lat * d2r  #Creating latitude array in radians
    coslat = np.cos(
        latrad
    ) * d2km * d2km  #Grouping constant multiplication outside of loop
    #Creates an array copy of ash_mass filled with the fill value
    area = xr.full_like(ash_mass, ash_mass._FillValue)
    shape = np.shape(area)
    #Begins looping through each element of array
    i = 0
    while i < (shape[0] - 1):
        j = 0
        while j < (shape[1] - 1):
            area[i, j] = abs(lat[i, j] - lat[i + 1, j]) * abs(
                abs(lon[i, j]) - abs(lon[i, j + 1])) * coslat[i, j]
            j += 1
        i += 1
    if write == 1:
        #Reformatting array attributes before writing to netcdf
        area.name = 'area'
        area.attrs['long_name'] = 'area of each lat/lon grid box'
        area.attrs['units'] = 'km^2'
        area.to_netcdf(directory +
                       'area_whole_domain.nc')  #Writes area array to netcdf
    return area
Ejemplo n.º 18
0
def first_run_after_date(
    da: xr.DataArray,
    window: int,
    date: str = "07-01",
    dim: str = "time",
    coord: Optional[Union[bool, str]] = "dayofyear",
):
    """Return the index of the first item of the first run after a given date.

    Parameters
    ----------
    da : xr.DataArray
      Input N-dimensional DataArray (boolean)
    window : int
      Minimum duration of consecutive run to accumulate values.
    date : str
      The date after which to look for the run.
    dim : str
      Dimension along which to calculate consecutive run (default: 'time').
    coord : Optional[Union[bool, str]]
      If not False, the function returns values along `dim` instead of indexes.
      If `dim` has a datetime dtype, `coord` can also be a str of the name of the
      DateTimeAccessor object to use (ex: 'dayofyear').

    Returns
    -------
    out : xr.DataArray
      Index (or coordinate if `coord` is not False) of first item in the first valid run. Returns np.nan if there are no valid run.
    """
    after_date = datetime.strptime(date, "%m-%d").timetuple().tm_yday

    mid_idx = np.where(da.time.dt.dayofyear == after_date)[0]
    if mid_idx.size == 0:  # The date is not within the group. Happens at boundaries.
        return xr.full_like(da.isel(time=0), np.nan, float).drop_vars("time")

    return first_run(
        da.where(da.time >= da.time[mid_idx][0]),
        window=window,
        dim=dim,
        coord=coord,
    )
Ejemplo n.º 19
0
def classify_landform(ds, elevation_levels=[], TYPE='SIMPLE'):
    """Subdivide landform classes by aspect class."""        
    SHAPE = ds['mask'].shape
    lf_cl = np.ma.masked_array(np.ones_like(ds['mask'].values), mask=ds['mask'].values)
    
    # depending on classifiaction scheme we need different slope classes that 
    # have an aspect component
    if TYPE == 'SIMPLE':
        aspect_lf = [3]
    elif TYPE == 'WEISS':
        aspect_lf = [2,3,5]
    else:
        log.error('Currently only classifiation schemes WEISS, SIMPLE supported.')
    ds.tile.set('classification', TYPE.lower())
    
    aspect_lfs = (ds['aspect_class'].to_masked_array() > 0) & \
                  (np.in1d(ds['landform'].to_masked_array(), aspect_lf).reshape(SHAPE))
    
    lf_cl = np.ma.where(aspect_lfs, ds['landform'] * 10 + ds['aspect_class'],
                                    ds['landform'] * 10).filled(NODATA)
    lf_cl = np.ma.masked_where(ds['mask'] == 0, lf_cl)
    
    # if we have elevation levels subdivide the landform classes
    ele = ds['elevation'].to_masked_array()
    if len(elevation_levels) > 0:
        # add global elevation step attribute (second element, first is lower boundary)
        ds.tile.set('elevation_step', elevation_levels[1])

        for i, (lb, ub) in enumerate(zip(elevation_levels[:-1], elevation_levels[1:])):
            lf_cl = np.ma.where(((ele >= lb) & (ele < ub)), lf_cl + (i+1) * 100, lf_cl)   

    # special encoding (force output as Int16)
    ENCODING_INT = dict(ENCODING)
    ENCODING_INT.update({'dtype': np.int16})    

    lf_cl = np.ma.masked_where(ds['mask'] == 0, lf_cl)
    da_lf_cl = xr.full_like(ds['landform'], np.nan)
    ds['landform_class'] = da_lf_cl
    ds['landform_class'][:] = lf_cl
    ds['landform_class'].tile.update_encoding(ENCODING_INT)
    return ds
Ejemplo n.º 20
0
def _qx_to_lx(qx):
    r"""
    Computes :math:`l_x` based on :math:`q_x`, where :math:`q_x` already
    contains the 95-100 (33) and 100-105 (44) age groups.  Also computes
    :math:`l_x` for 105-110 (45), and then set :math:`l_x` for 110+ to be 0.

    Args:
        qx (xr.DataArray): Probability of dying.

    Returns:
        (xr.DataArray): lx.
    """
    if tuple(qx["age_group_id"].values[-2:]) != (33, 44):
        raise ValueError("qx must have age group ids 33 and 44")

    px = 1.0 - qx  # now we have survival all the way to 100-105 (44) age group

    # Because l{x+n} = lx * px, we can compute all lx's if we start with
    # l_0 = 1 and iteratively apply the px's of higher age groups.
    # So we compute l_105-110, since we have p_100-105 from extrapolated qx.
    # We start with a set of lx's that are all 1.0
    lx = xr.full_like(px, 1)
    # now expand lx to have age groups 105-110 (45)
    lx = expand_dimensions(lx, fill_value=1, age_group_id=[45])

    # Since l{x+n} = lx * px, we make cumulative prduct of px down age groups
    # and apply the product to ages[1:] (since ages[0]) has lx = 1.0
    ages = lx["age_group_id"]

    ppx = px.cumprod(dim="age_group_id")  # the cumulative product of px
    ppx.coords["age_group_id"] = ages[1:]  # need to correspond to ages[1:]
    lx.loc[dict(age_group_id=ages[1:])] *= ppx  # lx all the way to 100-105

    # now artificially sets lx to be 0 for the 110+ age group.
    lx = expand_dimensions(lx, fill_value=0, age_group_id=[148])

    assert (lx.sel(age_group_id=2) == 1).all()
    assert tuple(lx['age_group_id'].values[-4:]) == (33, 44, 45, 148),\
        "final lx should have age group ids 33, 44, 45, and 148."

    return lx
Ejemplo n.º 21
0
    def extract_Sulphate(self):
        gefs = xr.open_dataset(self.f, engine='pynio')
        tmp1 = gefs.PMTF_P48_L105_GLL0_A62006

        sulp = tmp1
        sulp_size = xr.full_like(sulp, 0.139)

        sulp = sulp.assign_coords(
            {"lv_HYBL0": ("lv_HYBL0", self.p[::-1].values)})
        sulp_size = sulp_size.assign_coords(
            {"lv_HYBL0": ("lv_HYBL0", self.p[::-1].values)})
        pnew = np.arange(1000, 90, -10)
        sulp.sel(lv_HYBL0=pnew,
                 method="nearest").sel(lon_0=np.arange(250, 300, 1)).sel(
                     lat_0=np.arange(20, 60, 1))[20].plot()
        plt.show()
        sulp_size.sel(lv_HYBL0=pnew,
                      method="nearest").sel(lon_0=np.arange(250, 300, 1)).sel(
                          lat_0=np.arange(20, 60, 1))[20].plot()
        plt.show()
        return (sulp, sulp_size)
Ejemplo n.º 22
0
def first_run_after_date(
    da: xr.DataArray,
    window: int,
    date: Optional[DayOfYearStr] = "07-01",
    dim: str = "time",
    coord: Optional[Union[bool, str]] = "dayofyear",
) -> xr.DataArray:
    """Return the index of the first item of the first run after a given date.

    Parameters
    ----------
    da : xr.DataArray
      Input N-dimensional DataArray (boolean).
    window : int
      Minimum duration of consecutive run to accumulate values.
    date : DayOfYearStr
      The date after which to look for the run.
    dim : str
      Dimension along which to calculate consecutive run (default: 'time').
    coord : Optional[Union[bool, str]]
      If not False, the function returns values along `dim` instead of indexes.
      If `dim` has a datetime dtype, `coord` can also be a str of the name of the
      DateTimeAccessor object to use (ex: 'dayofyear').

    Returns
    -------
    xr.DataArray
      Index (or coordinate if `coord` is not False) of first item in the first valid run.
      Returns np.nan if there are no valid runs.
    """
    mid_idx = index_of_date(da[dim], date, max_idxs=1, default=0)
    if mid_idx.size == 0:  # The date is not within the group. Happens at boundaries.
        return xr.full_like(da.isel({dim: 0}), np.nan, float).drop_vars(dim)

    return first_run(
        da.where(da[dim] >= da[dim][mid_idx][0]),
        window=window,
        dim=dim,
        coord=coord,
    )
Ejemplo n.º 23
0
    def fit(self, X, *args, **kwargs):
        """Fit the model

        Fit all the transforms one after the other and transform the
        data, then fit the transformed data using the final estimator.

        Parameters
        ----------
        X : xarray.DataArray or xarray.Dataset
            Training data. Must fulfill input requirements of first step of
            the pipeline. If an xarray.Dataset is passed, it will be converted
            to an array using `to_array()`.
        y : xarray.DataArray, optional
            Training targets. Must fulfill label requirements for all steps
            of the pipeline.
        feature_dim : str, optional
            Name of feature dimension.
        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the this model. If the
            model is a sklearn Pipeline, parameters can be passed to each
            step, where each parameter name is prefixed such that parameter
            ``p`` for step ``s`` has key ``s__p``.
        """
        kws = {'along_dim': self._dim, 'feature_dim': DEFAULT_FEATURE_DIM}
        kws.update(kwargs)

        assert len(args) <= 1
        args = list(args)
        args.append(self._model)

        X = self._to_feature_x(X, feature_dim=kws['feature_dim'])

        if X.chunks:
            reduce_dims = [self._dim, kws['feature_dim']]
            mask = _make_mask(X, reduce_dims)
            template = xr.full_like(mask, None, dtype=np.object)
            self._models = xr.map_blocks(_fit_wrapper, X, args=args, kwargs=kws, template=template)
        else:
            self._models = _fit_wrapper(X, *args, **kws)
Ejemplo n.º 24
0
def column_ozone(filepath, lat, lon, altitude, time):
    ozone = xr.open_dataset(filepath)
    ma = 48
    mo3 = 28.97
    MMR = ozone.O3
    VMR = MMR/(ma/mo3)*1e6
    DELP = ozone.DELP
    g0 = 9.80665 # m/s2
    T0 = 273.15 # K
    k = 1.3807e-23 # J/K/molecule
    p0 = 1.01325e5 #Pa
    Na = 6.022e23
    R = 287.3 #J/Kg/K
    vmr_indx = np.where(3.28084*ozone.H[0,:,200,200].values/1000 >= altitude)
    constant = 10*(R*T0)/(g0*p0)
    Column = np.zeros([8,len(vmr_indx[0]),361,576])
    for i in vmr_indx[0]:
        Column[:,i,:,:] = 0.5 * (VMR[:,i,:,:] + VMR[:,i+1,:,:]) * DELP[:,i,:,:]
    O3 = 1e-2*constant*np.sum(Column, axis=1)
    O3_x = xr.full_like(VMR[:,0,:,:], O3)
    C_O3 = O3_x.interp(lat = lat, lon = lon, time = time )
    return C_O3.values
Ejemplo n.º 25
0
def create_template(like_da: xr.DataArray,
                    var_names: Iterable[str],
                    fill_val: float = np.nan) -> xr.Dataset:
    """Create an empty dataset with the given variables in it

    Parameters
    ----------
    like_da : xr.DataArray
        Template variable.
    var_names : list-like
        List of variable names

    Returns
    -------
    ds : xr.Dataset
        Template dataset
    """

    ds = xr.Dataset()
    for v in var_names:
        ds[v] = xr.full_like(like_da, fill_val, dtype=np.float32)
    return ds
Ejemplo n.º 26
0
def calc_radar_mask(data_flag, clutter_threshold=4):
    assert data_flag.dims == ("time", "height")

    # 0: clear, 1: maybe, 2: certain, -1: unknown
    radar_mask = xarray.full_like(data_flag.time, Cloud_flag.clear, dtype=int)
    radar_mask[(data_flag == Radar_flag.good).any("height")] = Cloud_flag.probably

    cloud_mask2d = (data_flag == Radar_flag.good).transpose("time", "height").values
    cloud_mask8bit = cloud_mask2d.astype(np.uint8)

    number, markers = cv2.connectedComponents(cloud_mask8bit)

    assert np.all(
        (markers == 0) == (cloud_mask2d == 0)
    ), "Cloud free should be marked with 0. This seems not to be the case."

    IDs, count = np.unique(markers, return_counts=True)
    IDs = IDs[1:]  # the first ID is the background, not needed here
    count = count[1:]  # the first ID is the background, not needed here
    potential_clouds = np.where(markers != 0)  # indices of potential cloudy pixels

    # remove clouds that are smaller than n pixel
    invalid_cloud_IDs = set(
        IDs[count < clutter_threshold]
    )  # set of echos that are too small and that are likely to be clutter
    for x, y in zip(*potential_clouds):
        if markers[x, y] in invalid_cloud_IDs:
            markers[x, y] = 0  # mark as cloud free

    certainly_cloudy = (markers != 0).any(axis=1)
    assert (radar_mask[certainly_cloudy] == Cloud_flag.probably).all()
    radar_mask[certainly_cloudy] = Cloud_flag.certain

    no_echo_signal = data_flag != Radar_flag.good
    no_clear_signal = data_flag != Radar_flag.clear
    radar_mask[(no_echo_signal & no_clear_signal).all("height")] = Cloud_flag.unknown

    return radar_mask
Ejemplo n.º 27
0
def _preprocess_weights(a, dim, new_dim, weights):
    """Preprocesses weights array to prepare for numpy computation.

    Parameters
    ----------
    a : xarray.Dataset or xarray.DataArray
        One of the arrays over which the function will be applied.
    dim : str, list
        The original dimension(s) to apply the function along.
    new_dim : str
        The newly named dimension after running ``_preprocess_dims``
    weights : xarray.Dataset or xarray.DataArray
        Weights to apply to function, matching the dimension size of
        ``new_dim``.
    """
    if weights is None:
        return xr.full_like(a, None)  # Return nan weighting array.
    else:
        # Throw error if there are negative weights.
        if weights.min() < 0:
            raise ValueError(
                'Weights has a minimum below 0. Please submit a weights array '
                'of positive numbers.')
        # Scale weights to vary from 0 to 1.
        weights = weights / weights.max()
        # Check that the weights array has the same size
        # dimension(s) as those being applied over.
        drop_dims = {k: 0 for k in a.dims if k not in new_dim}
        if dict(weights.sizes) != dict(a.isel(drop_dims).sizes):
            raise ValueError(
                f'weights dimension(s) {dim} of size {dict(weights.sizes)} '
                f"does not match DataArray's size "
                f'{dict(a.isel(drop_dims).sizes)}')
        if dict(weights.sizes) != dict(a.sizes):
            # Broadcast weights to full size of main object.
            _, weights = xr.broadcast(a, weights)
        return weights
Ejemplo n.º 28
0
def to_cross(
    data: xr.DataArray,
    tile: Hashable = "tile",
    x: Hashable = "grid_xt",
    y: Hashable = "grid_yt",
) -> xr.DataArray:
    """Combine tiles into a single 2D field resembling a "cross"

    Useful for quickly plotting maps or applying other 2D image processing
    techniques.
    
    See Also:

        Weyn J, Durran D, and Caruana, 2019
        https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2019MS001705

    """
    tiles = []
    dims = [y, x]

    rotation_plan = {
        (spec.x, spec.y): (spec.origin, tile_num) for tile_num, spec in TOPOLOGY.items()
    }

    data = data.drop_vars(dims + [tile], errors="ignore").transpose(..., tile, y, x)
    null = xr.full_like(data.isel({tile: 0}), np.nan)
    for j in range(3):
        row = []
        for i in range(4):
            if (i, j) in rotation_plan:
                origin, tile_num = rotation_plan[(i, j)]
                arr = rotate(data.isel({tile: tile_num}), origin, SW, dims=dims)
            else:
                arr = null
            row.append(arr)
        tiles.append(row)
    return xr.combine_nested(tiles, concat_dim=dims)
Ejemplo n.º 29
0
def effr_ice(ice, temp):
    min_qi = 1.e-8
    gfdl_tice = 273.16
    gfdl_beta = 1.22
    gfdl_reimin = 10.0
    gfdl_reimax = 150.0
    result = xr.full_like(ice, 0.0)
    ice_positive = xr.where(ice > min_qi, ice, min_qi)
    result = xr.where(
        temp[1:] - gfdl_tice >= -30.0, gfdl_beta / 9.387 * np.exp(
            (1 - 0.969) * np.log(1.0e3 * ice_positive)) * 1.0e3, result)
    result = xr.where(
        temp[1:] - gfdl_tice < -30.0, gfdl_beta / 9.208 * np.exp(
            (1 - 0.945) * np.log(1.0e3 * ice_positive)) * 1.0e3, result)
    result = xr.where(
        temp[1:] - gfdl_tice < -40.0, gfdl_beta / 9.337 * np.exp(
            (1 - 0.920) * np.log(1.0e3 * ice_positive)) * 1.0e3, result)
    result = xr.where(
        temp[1:] - gfdl_tice < -50.0, gfdl_beta / 9.917 * np.exp(
            (1 - 0.891) * np.log(1.0e3 * ice_positive)) * 1.0e3, result)
    result = xr.where(result < gfdl_reimin, gfdl_reimin, result)
    result = xr.where(result > gfdl_reimax, gfdl_reimax, result)
    result = xr.where(ice > min_qi, result, 0)
    return result * 2
Ejemplo n.º 30
0
def test_weights(ones, ndims):
    dims = ones.dims
    if ones.ndim < ndims:
        pytest.skip(
            "Don't need to test when number of dimension combinations "
            "exceeds the number of array dimensions"
        )

    bins = np.array([0, 0.9, 1.1, 2])
    bins_c = 0.5 * (bins[1:] + bins[:-1])

    weight_value = 0.5

    def _check_result(h, d):
        other_dims = [dim for dim in ones.dims if dim not in d]
        if len(other_dims) > 0:
            assert set(other_dims) <= set(h.dims)
        # check that all values are in the central bin
        h_sum = h.sum(other_dims)
        h_sum_expected = xr.DataArray(
            [0, weight_value * ones.size, 0],
            dims=["ones_bin"],
            coords={"ones_bin": ("ones_bin", bins_c)},
            name="histogram_ones",
        )
        xr.testing.assert_identical(h_sum, h_sum_expected)

    # get every possible combination of sub-dimensions
    for n_combinations in range(ones.ndim):
        for weight_dims in combinations(dims, n_combinations):
            i_selector = {dim: 0 for dim in weight_dims}
            weights = xr.full_like(ones.isel(**i_selector), weight_value)
            for nc in range(ndims):
                for d in combinations(dims, nc + 1):
                    h = histogram(ones, weights=weights, bins=[bins], dim=d)
                    _check_result(h, d)
Ejemplo n.º 31
0
def get_pop_region_mask_za(
    mask_type='3d',
    grid_name='POP_gx1v7',
):
    """return a region mask for zonal averaging"""
    mask3d = pop_tools.region_mask_3d(grid_name,
                                      mask_name='Pacific-Indian-Atlantic')
    nregion = len(mask3d.region)

    if mask_type.lower() == '3d':
        return mask3d

    elif mask_type.lower() == '2d':
        mask2d = xr.full_like(mask3d.isel(region=0),
                              fill_value=0,
                              dtype=np.int32)
        for i in range(
                1, nregion
        ):  # skip first index because "za" puts the global field in there
            mask2d = xr.where(mask3d.isel(region=i) == 1, i, mask2d)
        mask2d.name = 'REGION_MASK'
        return mask2d
    raise ValueError(
        f'unknown mask type: {mask_type}\nexpecting either "2d" or "3d"')
Ejemplo n.º 32
0
    def get_index(
        self,
        da: Union[xr.DataArray, xr.Dataset],
        interp: Optional[Union[bool, str]] = None,
    ):
        """Return the group index of each element along the main dimension.

        Parameters
        ----------
        da : Union[xr.DataArray, xr.Dataset]
          The input array/dataset for which the group index is returned.
          It must have Grouper.dim as a coordinate.
        interp : Union[bool, str]
          Argument `interp` defaults to `self.interp`. If True, the returned index can be
          used for interpolation. For month grouping, integer values represent the middle of the month, all other
          days are linearly interpolated in between.

        Returns
        -------
        xr.DataArray
          The index of each element along `Grouper.dim`.
          If `Grouper.dim` is `time` and `Grouper.prop` is None, an uniform array of True is returned.
          If `Grouper.prop` is a time accessor (month, dayofyear, etc), an numerical array is returned,
            with a special case of `month` and `interp=True`.
          If `Grouper.dim` is not `time`, the dim is simply returned.
        """
        if self.prop is None:
            if self.dim == "time":
                return xr.full_like(da[self.dim], True, dtype=bool)
            return da[self.dim]

        ind = da.indexes[self.dim]
        i = getattr(ind, self.prop)

        if not np.issubdtype(i.dtype, np.integer):
            raise ValueError(
                f"Index {self.name} is not of type int (rather {i.dtype}), but {self.__class__.__name__} requires integer indexes."
            )

        interp = (
            (interp or self.interp)
            if not isinstance(interp, str)
            else interp != "nearest"
        )
        if interp:
            if self.dim == "time":
                if self.prop == "month":
                    i = ind.month - 0.5 + ind.day / ind.days_in_month
                elif self.prop == "dayofyear":
                    i = ind.dayofyear
                else:
                    raise NotImplementedError
            else:
                raise NotImplementedError

        xi = xr.DataArray(
            i,
            dims=self.dim,
            coords={self.dim: da.coords[self.dim]},
            name=self.dim + " group index",
        )

        # Expand dimensions of index to match the dimensions of da
        # We want vectorized indexing with no broadcasting
        # xi = xi.broadcast_like(da)
        xi.name = self.prop
        return xi
Ejemplo n.º 33
0
def get_slope(dem, *resolution):
    x, y = np.gradient(dem, *resolution)

    slope = np.arctan(np.sqrt(x * x + y * y)) * 180 / np.pi
    xr_slope = xr.full_like(dem, slope)
    return xr_slope
Ejemplo n.º 34
0
    def convert_to_xarray(self, data: Dict) -> Union[xr.Dataset, Tuple[xr.Dataset, xr.Dataset]]:
        """
        Parameters
        ----------
        data
            Data from the ``load_data`` function

        Returns
        -------
            data formatted to an xarray Dataset
        """

        # split up the fields into one of different sizes and optional returns
        fields = dict()

        # not currently returned
        fields['geometry'] = ['Tan_Alt', 'Tan_Lat', 'Tan_Lon']
        fields['flags'] = ['InfVec', 'Dropped']
        fields['profile_flags'] = ['ProfileInfVec']

        # always returned - 1 per profile
        fields['general'] = ['Event_Num', 'Lat', 'Lon', 'Beta', 'Duration', 'Type_Sat', 'Type_Tan', 'Trop_Height']

        # optional return parameters
        fields['background'] = ['NMC_Pres', 'NMC_Temp', 'NMC_Dens', 'NMC_Dens_Err', 'Density', 'Density_Err']
        fields['ozone'] = ['O3', 'O3_Err']
        fields['no2'] = ['NO2', 'NO2_Err']
        fields['h2o'] = ['H2O', 'H2O_Err']
        fields['aerosol'] = ['Ext386', 'Ext452', 'Ext525', 'Ext1020', 'Ext386_Err', 'Ext452_Err', 'Ext525_Err',
                             'Ext1020_Err']
        fields['particle_size'] = ['SurfDen', 'Radius', 'SurfDen_Err', 'Radius_Err']

        xr_data = []
        index_flags = self.convert_index_bit_flags(data)
        species_flags = self.convert_species_bit_flags(data)
        time = pd.to_timedelta(data['mjd'], 'D') + pd.Timestamp('1858-11-17')

        data['Trop_Height'] = data['Trop_Height'].flatten()
        for key in fields['general']:
            xr_data.append(xr.DataArray(data[key], coords=[time], dims=['time'], name=key))

        if 'aerosol' in self.species or self.filter_ozone:  # we need aerosol to filter ozone
            altitude = data['Alt_Grid'][0:80]
            wavel = np.array([386.0, 452.0, 525.0, 1020.0])
            ext = np.array([data['Ext386'], data['Ext452'], data['Ext525'], data['Ext1020']])
            xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude],
                                        dims=['wavelength', 'time', 'Alt_Grid'], name='Ext'))
            ext = np.array([data['Ext386_Err'], data['Ext452_Err'], data['Ext525_Err'], data['Ext1020_Err']])
            xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude],
                                        dims=['wavelength', 'time', 'Alt_Grid'], name='Ext_Err'))
            for key in fields['particle_size']:
                xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
                                            dims=['time', 'Alt_Grid'], name=key))
        if 'no2' in self.species:
            altitude = data['Alt_Grid'][0:100]
            for key in fields['no2']:
                xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
                                            dims=['time', 'Alt_Grid'], name=key))
        if 'h2o' in self.species:
            altitude = data['Alt_Grid'][0:100]
            for key in fields['h2o']:
                xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
                                            dims=['time', 'Alt_Grid'], name=key))
        if any(i in ['ozone', 'o3'] for i in self.species):
            altitude = data['Alt_Grid'][0:140]
            for key in fields['ozone']:
                xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
                                            dims=['time', 'Alt_Grid'], name=key))

        if 'background' in self.species:
            altitude = data['Alt_Grid'][0:140]
            for key in fields['background']:
                xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
                                            dims=['time', 'Alt_Grid'], name=key))

        xr_data = xr.merge(xr_data)

        if self.enumerate_flags:
            xr_data = xr.merge([xr_data, index_flags, species_flags])

        for var in xr_data.variables.keys():
            if xr_data[var].dtype == 'float32' or 'Err' in var:
                xr_data[var] = xr_data[var].where(xr_data[var] != data['FillVal'])

        # determine cloud filter for aerosol data
        cloud_filter = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool)
        min_alt = (xr_data.Alt_Grid * (species_flags.Cloud_Bit_1 & species_flags.Cloud_Bit_2)).max(dim='Alt_Grid')
        cloud_filter = cloud_filter.where(cloud_filter.Alt_Grid > min_alt)
        xr_data['cloud_filter'] = np.isnan(cloud_filter)

        # determine valid ozone altitudes
        if any(i in ['ozone', 'o3'] for i in self.species):
            # add an ozone filter field for convenience
            ozone_good = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool)
            # Exclusion of all data points with an uncertainty estimate of 300% or greater
            ozone_good = ozone_good.where(xr_data.O3_Err < 30000)
            # Exclusion of all profiles with an uncertainty greater than 10% between 30 and 50 km
            no_good = (xr_data.O3_Err > 1000) & (xr_data.Alt_Grid > 30) & (xr_data.Alt_Grid < 50)
            ozone_good = ozone_good.where(~no_good)
            # Exclusion of all data points at altitude and below the occurrence of an aerosol extinction value of
            # greater than 0.006 km^-1
            # NOTE: the wavelength to use as the filter is not specified in the documentation, so I have chosen the
            # wavelength with the smallest extinction and therefore the strictest filtering
            min_alt = (xr_data.Alt_Grid * (xr_data.Ext.sel(wavelength=1020) > 0.006)).max(dim='Alt_Grid')
            ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt)
            # Exclusion of all data points at altitude and below the occurrence of both the 525nm aerosol extinction
            # value exceeding 0.001 km^-1 and the 525/1020 extinction ratio falling below 1.4
            min_alt = (xr_data.Alt_Grid * ((xr_data.Ext.sel(wavelength=525) > 0.001) &
                                           ((xr_data.Ext.sel(wavelength=525) / xr_data.Ext.sel(
                                               wavelength=1020)) < 1.4))).max(dim='Alt_Grid')
            ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt)
            # Exclusion of all data points below 35km an 200% or larger uncertainty estimate
            no_good = (xr_data.O3_Err > 20000) & (xr_data.Alt_Grid < 35)
            ozone_good = ~np.isnan(ozone_good.where(~no_good))
            xr_data['ozone_filter'] = ozone_good

        if self.filter_aerosol:
            xr_data['Ext'] = xr_data.Ext.where(~xr_data.cloud_filter)

        if self.filter_ozone:
            xr_data['O3'] = xr_data.O3.where(ozone_good)

        # drop aerosol if not requested
        if self.filter_ozone and not ('aerosol' in self.species):
            xr_data.drop(['Ext', 'Ext_Err', 'wavelength'])

        if self.normalize_percent_error:
            for var in xr_data.variables.keys():
                if 'Err' in var:  # put error units back into percent
                    xr_data[var] = (xr_data[var] / 100).astype('float32')

        xr_data = xr_data.transpose('time', 'Alt_Grid', 'wavelength')
        xr_data = self.apply_cf_conventions(xr_data)

        if self.return_separate_flags:
            return xr_data, xr.merge([index_flags, species_flags])
        else:
            return xr_data