def def_sponge_dampingtimescale_north(Y,sponge_width,idampval):
    '''Define a sponge grid at the north of the domain based on horizontal grid shape.
    hgrid is the horizontal grid dataset
    sponge_width is the degrees of lat to damp over [must be a list, progressively decreasing in width]
    idampval is the inverse damping rate (in s-1) [must be a list] '''
    idamp = xr.zeros_like(Y)
    for i in range(len(sponge_width)):
        sponge_region = Y>Y.max(xr.ALL_DIMS)-sponge_width[i]
        idamp=idamp+xr.zeros_like(Y).where(~sponge_region,idampval[i])
    return idamp
def def_sponge_damping_linear_north(Y, sponge_width, idampval_max):
    '''Define a sponge grid at the north of the domain based on horizontal grid shape.
    hgrid is the horizontal grid dataset
    sponge_width is the degrees of lat to damp over and idampval is the inverse damping rate (in s-1)
    The function prescribes a linear inverse damping rate and it decays from maximum at the northern boundary
    to 0 at end of the sponge width region. '''
    idamp = xr.zeros_like(Y)
    sponge_region = Y > Y.max(xr.ALL_DIMS) - sponge_width
    idamp = idamp + xr.zeros_like(Y).where(~sponge_region, idampval_max)
    idamp = idamp * (Y - Y.max(xr.ALL_DIMS) + sponge_width) / sponge_width

    return idamp
Esempio n. 3
0
    def reduce_chunked(self, xs, output):
        """Computes the skew across a chunk
        
        Parameters
        ----------
        xs : iterable
            Iterable of sources
        
        Returns
        -------
        UnitsDataArray
            Skew of the source data over dims
        """
        N = xr.zeros_like(output)
        M1 = xr.zeros_like(output)
        M2 = xr.zeros_like(output)
        M3 = xr.zeros_like(output)
        check_empty = True

        for x in xs:
            Nx = np.isfinite(x).sum(dim=self._dims)
            M1x = x.mean(dim=self._dims)
            Ex = x - M1x
            Ex2 = Ex**2
            Ex3 = Ex2 * Ex
            M2x = (Ex2).sum(dim=self._dims)
            M3x = (Ex3).sum(dim=self._dims)

            # premask to omit NaNs
            b = Nx.data > 0
            Nx = Nx.data[b]
            M1x = M1x.data[b]
            M2x = M2x.data[b]
            M3x = M3x.data[b]

            Nb = N.data[b]
            M1b = M1.data[b]
            M2b = M2.data[b]

            # merge
            d = M1x - M1b
            n = Nb + Nx
            NNx = Nb * Nx

            M3.data[b] += (M3x + d**3 * NNx * (Nb - Nx) / n**2 + 3 * d *
                           (Nb * M2x - Nx * M2b) / n)
            M2.data[b] += M2x + d**2 * NNx / n
            M1.data[b] += d * Nx / n
            N.data[b] = n

        # calculate skew
        skew = np.sqrt(N) * M3 / np.sqrt(M2**3)
        return skew
Esempio n. 4
0
    def _invert_from_model_any(inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind):
        # wrapper to allow computation on any type (xarray, numpy)

        try:
            # if input is xarray, will return xarray
            da_ws_co = xr.zeros_like(sigma0_co_db, dtype=np.complex128)
            da_ws_co.name = 'windspeed_gmf'
            da_ws_co.attrs.clear()
            da_ws_cr = xr.zeros_like(sigma0_co_db, dtype=np.float64)
            da_ws_cr.name = 'windspeed_gmf'
            da_ws_cr.attrs.clear()

            try:
                # if dask array, use map_blocks
                # raise ImportError
                import dask.array as da
                if any(
                        [
                            isinstance(v.data, da.Array)
                            for v in [inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind]
                        ]
                ):
                    da_ws_co.data, da_ws_cr.data = da.apply_gufunc(
                        _invert_from_model_numpy,
                        '(n),(n),(n),(n),(n)->(n),(n)',
                        inc.data, sigma0_co_db.data, sigma0_cr_db.data, dsig_cr.data, ancillary_wind.data
                    )
                    logger.debug('invert with map_blocks')
                else:
                    raise TypeError

            except (ImportError, TypeError):
                # use numpy array, but store in xarray
                da_ws_co.data, da_ws_cr.data = _invert_from_model_numpy(
                    np.asarray(inc),
                    np.asarray(sigma0_co_db),
                    np.asarray(sigma0_cr_db),
                    np.asarray(dsig_cr),
                    np.asarray(ancillary_wind),
                )
                logger.debug('invert with xarray.values. no chunks')
        except TypeError:
            # full numpy
            logger.debug('invert with numpy')
            da_ws_co, da_ws_cr = _invert_from_model_numpy(
                inc,
                sigma0_co_db,
                sigma0_cr_db,
                dsig_cr,
                ancillary_wind
            )

        return da_ws_co, da_ws_cr
def main (era_filesearch, cesm_base_filesearch, bias_output):

    print("opening data")
    era_data         = xr.open_mfdataset(era_filesearch,         concat_dim='time')
    base_cesm_data   = xr.open_mfdataset(cesm_base_filesearch,   concat_dim='time')

    print("loading data")
    era_data.load()
    base_cesm_data.load()

    print("compute means")
    emean = era_data.std(dim="time")
    cmean = base_cesm_data.std(dim="time")

    print("creating data")
    interpolated_era = xr.zeros_like(cmean)
    print("loading data")
    interpolated_era.load()

    z_interp_all_vars(emean, interpolated_era, era_data["z"].mean(dim="time"), base_cesm_data["z"].mean(dim="time"), vars_to_correct)
    interpolated_era.to_netcdf("era_interpolated_std.nc")

    print("Computing Bias")
    bias = interpolated_era - cmean

    print("writing")
    bias.to_netcdf(bias_output)
Esempio n. 6
0
    def construct_knn_graph(
        cls,
        data,
        dist_mat,
        k: int,
        cell_properties: Union[bool, Sequence[str]] = False,
        cell_channel_properties: Union[bool, Sequence[str]] = False
    ) -> 'SpatialCellGraph':
        """Constructs a new k-nearest cell neighbor graph

        :param data: single-cell data (rows: cell IDs, columns: feature names)
        :type data: SingleCellData or DataFrame-like
        :param dist_mat: symmetric distance matrix, shape: ``(cells, cells)``
        :type dist_mat: DataArray-like
        :param k: number of nearest neighbors for the graph construction
        :param cell_properties: list of cell properties (e.g. regionprops) to include as node attributes; set to
            ``True`` to include all
        :param cell_channel_properties: list of cell channel properties (e.g. intensity values) to include  as node
            attributes; set to ``True`` to include all
        :return: a directed k-nearest cell neighbor graph
        """
        data, dist_mat = cls._prepare_data(data, dist_mat, cell_properties,
                                           cell_channel_properties)
        adj_mat = xr.zeros_like(dist_mat, dtype='bool')
        knn_indices = np.argpartition(dist_mat.values, k + 1,
                                      axis=1)[:, :(k + 1)]
        for current_index, current_knn_indices in enumerate(knn_indices):
            adj_mat[current_index, current_knn_indices] = True
        np.fill_diagonal(adj_mat.values, False)
        return SpatialCellGraph(data, adj_mat, _skip_data_preparation=True)
Esempio n. 7
0
def _regrid_given_delp(
    ds,
    delp_fine,
    delp_coarse,
    weights,
    x_dim: str = FV_CORE_X_CENTER,
    y_dim: str = FV_CORE_Y_CENTER,
    z_dim: str = RESTART_Z_CENTER,
):
    """Given a fine and coarse delp, do vertical regridding to coarse pressure levels
    and mask weights below fine surface pressure.
    """
    delp_coarse_on_fine = block_upsample_like(
        delp_coarse, delp_fine, x_dim=x_dim, y_dim=y_dim
    )
    phalf_coarse_on_fine = pressure_at_interface(
        delp_coarse_on_fine, dim_center=z_dim, dim_outer=RESTART_Z_OUTER
    )
    phalf_fine = pressure_at_interface(
        delp_fine, dim_center=z_dim, dim_outer=RESTART_Z_OUTER
    )

    ds_regrid = xr.zeros_like(ds)
    for var in ds:
        ds_regrid[var] = regrid_vertical(
            phalf_fine, ds[var], phalf_coarse_on_fine, z_dim_center=z_dim
        )

    masked_weights = _mask_weights(
        weights, phalf_coarse_on_fine, phalf_fine, dim_center=z_dim
    )

    return ds_regrid, masked_weights
Esempio n. 8
0
def compute_climatology(ds,
                        monthValues,
                        calendar=None,
                        maskVaries=True):  # {{{
    """
    Compute a monthly, seasonal or annual climatology data set from a data
    set.  The mean is weighted but the number of days in each month of
    the data set, ignoring values masked out with NaNs.  If the month
    coordinate is not present, a data array ``month`` will be added based
    on ``Time`` and the provided calendar.

    Parameters
    ----------
    ds : ``xarray.Dataset`` or ``xarray.DataArray`` object
        A data set with a ``Time`` coordinate expressed as days since
        0001-01-01 or ``month`` coordinate

    monthValues : int or array-like of ints
        A single month or an array of months to be averaged together

    calendar : ``{'gregorian', 'gregorian_noleap'}``, optional
        The name of one of the calendars supported by MPAS cores, used to
        determine ``month`` from ``Time`` coordinate, so must be supplied if
        ``ds`` does not already have a ``month`` coordinate or data array

    maskVaries: bool, optional
        If the mask (where variables in ``ds`` are ``NaN``) varies with time.
        If not, the weighted average does not need make extra effort to account
        for the mask.  Most MPAS fields will have masks that don't vary in
        time, whereas observations may sometimes be present only at some
        times and not at others, requiring ``maskVaries = True``.

    Returns
    -------
    climatology : object of same type as ``ds``
        A data set without the ``'Time'`` coordinate containing the mean
        of ds over all months in monthValues, weighted by the number of days
        in each month.

    Authors
    -------
    Xylar Asay-Davis

    Last Modified
    -------------
    04/08/2017
    """

    ds = add_years_months_days_in_month(ds, calendar)

    mask = xr.zeros_like(ds.month, bool)

    for month in monthValues:
        mask = xr.ufuncs.logical_or(mask, ds.month == month)

    climatologyMonths = ds.where(mask, drop=True)

    climatology = _compute_masked_mean(climatologyMonths, maskVaries)

    return climatology  # }}}
Esempio n. 9
0
def plot(G):

    # Don't plot first or last bin (expanded to capture full range)
    G = G.isel(sigma0=slice(1, -1))
    levs = G["sigma0"].values

    # Take annual mean and load
    G = G.mean("time").load()
    # Get terms in dataset
    terms = list(G.data_vars)

    fig, ax = plt.subplots()
    # Plot each term
    for term in terms:
        if term == "heat":
            color = "tab:red"
        elif term == "salt":
            color = "tab:blue"
        else:
            color = "k"
        ax.plot(levs, G[term], label=term, color=color)

    # If terms were not grouped then sum them up to get total
    if len(terms) > 1:
        total = xr.zeros_like(G[terms[0]])
        for term in terms:
            total += G[term]
        ax.plot(levs, total, label="total", color="k")

    ax.legend()
    ax.set_xlabel("SIGMA0")
    ax.set_ylabel("TRANSFORMATION ($m^3s^{-1}$)")
    ax.autoscale(enable=True, axis="x", tight=True)

    return fig
Esempio n. 10
0
def day_number_to_date_mars_model(ls_in,
                                  calendar_type='none',
                                  units_in='days since 0000-00-0 00:00:00'):

    year_values = xar.zeros_like(ls_in)

    my_temp = 1
    ls_previous = 0.
    dodgy_ls_list = []

    for i in range(len(ls_in.squeeze().values) - 1):

        if ls_in[i] - ls_previous > 0. and ls_in[i + 1] - ls_in[i] > 0.:
            year_values[i] = my_temp
        elif ls_in[i] - ls_previous < 0. and ls_in[i + 1] - ls_in[i] > 0.:
            year_values[i] = my_temp
        elif ls_in[i] - ls_previous > 0. and ls_in[i + 1] - ls_in[i] < 0.:
            year_values[i] = my_temp
        elif ls_in[i] - ls_previous < 0. and ls_in[i + 1] - ls_in[i] < 0.:
            dodgy_ls_list.append(i)
            my_temp = my_temp + 1
            year_values[i] = my_temp
        ls_previous = ls_in[i]

    year_values[-1] = my_temp

    ls_in[dodgy_ls_list] = 0.

    dayofyear_values = np.floor(ls_in)
    month_values = np.mod(np.ceil((ls_in / 30.) - 0.5) + 3., 12)

    cdftime = cdftime_mars(dayofyear_values, month_values, year_values)

    return cdftime, ls_in
Esempio n. 11
0
def test_merge_into_oceandataset():

    # da without name
    da = od_in.dataset['XC'] * od_in.dataset['YC']
    with pytest.raises(ValueError) as e:
        od_out = od_in.merge_into_oceandataset(da)
    assert str(
        e.value
    ) == "xarray.DataArray doesn't have a name. Set it using da.rename()"

    # da different name
    da = da.rename('test')
    od_out = od_in.merge_into_oceandataset(da)
    assert od_out.dataset['test'].equals(da)

    # ds
    ds = xr.merge([da.rename('test1'), da.rename('test2')])
    od_out = od_in.merge_into_oceandataset(ds)
    assert set(['test1', 'test2']).issubset(od_out.dataset.variables)

    # da
    da = xr.zeros_like(od_in.dataset['XC'])
    with pytest.warns(UserWarning):
        od_out = od_in.merge_into_oceandataset(da)
    with pytest.warns(UserWarning):
        od_out = od_in.merge_into_oceandataset(da, overwrite=True)
Esempio n. 12
0
def _column_dq1(ds: xr.Dataset) -> xr.DataArray:
    if "net_heating_due_to_machine_learning" in ds:
        warnings.warn(
            "'net_heating_due_to_machine_learning' is a deprecated variable name. "
            "It will not be supported in future versions of fv3net. Use "
            "'column_heating_due_to_machine_learning' instead.",
            DeprecationWarning,
        )
        # fix isochoric vs isobaric transition issue
        column_dq1 = 716.95 / 1004 * ds.net_heating_due_to_machine_learning
    elif "net_heating" in ds:
        warnings.warn(
            "'net_heating' is a deprecated variable name. "
            "It will not be supported in future versions of fv3net. Use "
            "'column_heating_due_to_machine_learning' instead.",
            DeprecationWarning,
        )
        # fix isochoric vs isobaric transition issue
        column_dq1 = 716.95 / 1004 * ds.net_heating
    elif "column_heating_due_to_machine_learning" in ds:
        column_dq1 = ds.column_heating_due_to_machine_learning
    elif "storage_of_internal_energy_path_due_to_machine_learning" in ds:
        column_dq1 = ds.storage_of_internal_energy_path_due_to_machine_learning
    else:
        # assume given dataset is for a baseline or verification run
        column_dq1 = xr.zeros_like(ds.PRATEsfc)
    column_dq1.attrs = {
        "long_name": "<dQ1> column integrated heating from ML",
        "units": "W/m^2",
    }
    return column_dq1.rename("column_integrated_dQ1")
Esempio n. 13
0
def test__bin_stats(ds):
	from sciapy.level2.binning import _bin_stats
	_ds = ds.copy()
	_ds["latitude"] = xr.zeros_like(_ds.latitude)
	# binning result
	avg_aw = _bin_stats(
		_ds,
		binvar="latitude", tvar="time",
		area_weighted=True,
	)
	avg_nw = _bin_stats(
		_ds,
		binvar="latitude", tvar="time",
		area_weighted=False,
	)
	xr.testing.assert_allclose(avg_nw, avg_aw)

	# non-weighted mean using standard functions
	dims = ("latitude", "time")
	stacked = "__stacked__"
	_ds = _ds.stack(**{stacked: dims})
	ds_avg = _ds.mean(dim=stacked)
	ds_cnt = _ds.count(dim=stacked)
	ds_std = _ds.std(dim=stacked, ddof=1)
	ds_std = ds_std.rename({v: v + "_std" for v in ds_std.data_vars})
	ds_cnt = ds_cnt.rename({v: v + "_cnt" for v in ds_cnt.data_vars})
	avg_ds = xr.merge([ds_avg, ds_std, ds_cnt])
	# Re-create the sum of squared weights
	_ws = xr.ones_like(_ds.latitude, dtype=float)
	_ws /= _ws.sum(dim=stacked)
	avg_ds["wsqsum"] = (_ws**2).sum(dim=stacked)
	xr.testing.assert_allclose(avg_nw, avg_ds)
Esempio n. 14
0
def _column_nq1(ds: xr.Dataset) -> xr.DataArray:
    if "column_heating_nudge" in ds:
        # name for column integrated temperature nudging in nudge-to-obs
        column_nq1 = ds.column_heating_nudge
    elif "int_t_dt_nudge" in ds:
        # name for column-integrated temperature nudging in X-SHiELD runs
        column_nq1 = ds.int_t_dt_nudge
    elif "net_heating_due_to_nudging" in ds:
        # old name for column integrated temperature nudging in nudge-to-fine
        warnings.warn(
            "'net_heating_due_to_nudging' is a deprecated variable name. "
            "It will not be supported in future versions of fv3net. Use "
            "'column_heating_due_to_nudging' instead.",
            DeprecationWarning,
        )
        # fix isochoric vs isobaric transition issue
        column_nq1 = 716.95 / 1004 * ds.net_heating_due_to_nudging
    elif "column_heating_due_to_nudging" in ds:
        column_nq1 = ds.column_heating_due_to_nudging
    else:
        # assume given dataset is for a run without temperature nudging
        column_nq1 = xr.zeros_like(ds.PRATEsfc)
    column_nq1.attrs = {
        "long_name": "<nQ1> column integrated heating from nudging",
        "units": "W/m^2",
    }
    return column_nq1.rename("column_integrated_nQ1")
Esempio n. 15
0
    def wrapper(*args, **kwargs):
        fn.utils.assert_isdarray(args[0])
        fn.utils.assert_isdarray(args[1])

        Ton, Tref = args[:2]
        Tout = xr.zeros_like(Ton)
        refids = np.unique(Tref.scanid)
        onids  = np.unique(Ton.scanid)

        for onid in tqdm(onids):
            # _f denotes former REF (before ON)
            # _l denotes latter REF (after ON)
            index = np.searchsorted(refids, onid)

            if index == 0:
                index_f = index_l = 0
            elif index == len(refids):
                index_f = index_l = len(refids)-1
            else:
                index_f, index_l = index-1, index

            index_on  = (Ton.scanid == onid)
            index_ref = ((Tref.scanid == refids[index_f])
                         | (Tref.scanid == refids[index_l]))

            Ton_  = Ton[index_on]
            Tref_ = Tref[index_ref]
            Tout_ = func(Ton_, Tref_, *args[2:], **kwargs)

            assert Tout_.shape == Ton_.shape
            Tout[index_on] = Tout_

        return Tout
Esempio n. 16
0
def apply_binary_mask(times, dep_lat, dep_lon, mask, reverse=True):
    array_list = []
    origins = xr.zeros_like(mask)

    for i in numba.prange(times.shape[0]):
        time = times[i]
        dep_lat_, dep_lon_ = dep_lat.sel(time=time).copy(), dep_lon.sel(
            time=time).copy()
        dep_lat_nan = np.isnan(dep_lat_.values.flatten())
        dep_lon_nan = np.isnan(dep_lon_.values.flatten())
        assert (dep_lat_nan == dep_lon_nan).all(), "This should not happen!"

        dep_lat_no_nan = dep_lat_.values.flatten()[~dep_lat_nan]
        dep_lon_no_nan = dep_lon_.values.flatten()[~dep_lon_nan]

        points = [x for x in zip(dep_lat_no_nan, dep_lon_no_nan)]
        landsea = list()
        for point in points:
            landsea.append(
                mask.sel(latitude=point[0],
                         longitude=point[1],
                         method='nearest').values)
            origins.sel(latitude=point[0],
                        longitude=point[1],
                        method='nearest').values += 1
        vals = dep_lat_.values
        if reverse:
            vals[~np.isnan(vals)] = [0 if x == 1 else 1 for x in landsea
                                     ]  # switching sea breeze to 1
        else:
            vals[~np.isnan(vals)] = [x for x in landsea]
        array_list.append(vals)
        print("Done time {}".format(time))
    return array_list, origins
def compute_by_block(dsx):
    """
    
    """

    # determine index key for each chunk
    slices = []
    for chunks in dsx.chunks:
        L = [
            0,
        ] + list(np.cumsum(chunks))
        slices.append([slice(a, b) for a, b in (zip(L[:-1], L[1:]))])
    indexes = list(product(*slices))

    # allocate memory to receive result
    if isinstance(dsx, xr.DataArray):
        result = xr.zeros_like(dsx).load()
    else:
        result = np.zeros(dsx.shape)

    #evaluate each chunk one at a time
    for index in tqdm_notebook(indexes, leave=False):
        block = dsx.__getitem__(index).compute()
        result.__setitem__(index, block)

    return result
Esempio n. 18
0
def create_area_grid(da, res=0.1):
    da_area = xr.zeros_like(da)
    da_area.attrs = {'long_name': 'area', 'units': 'ha'}
    da_area.name = 'area'
    for lat in da_area.lat.values:
        da_area.loc[{'lat': lat}] = calc_area(lat, res)
    return da_area
Esempio n. 19
0
def ismatch(dataarray: xr.DataArray,
            pattern: Union[Pattern, str],
            flags: re.RegexFlag = 0) -> xr.DataArray:
    """Test whether each string in a DataArray matches a regex pattern.

    Args:
        dataarray: String DataArray to be compared.
        pattern: String or compiled regex pattern.
        flags: Regex flags to control the matching behavior.

    Returns:
        Boolean DataArray each value of which is ``True``
        where it matches the pattern and ``False`` otherwise.

    Raises:
        TypeError: Raised if ``dataarray.dtype`` is not string-like.

    """
    if not np.issubdtype(dataarray.dtype, np.str_):
        raise TypeError("Can only be used for string DataArray.")

    pattern = re.compile(pattern, flags)
    search = np.vectorize(lambda string: pattern.search(string))

    result = xr.zeros_like(dataarray, bool)
    result.values = search(dataarray.values).astype(bool)

    return result
Esempio n. 20
0
def process_per_timestep(dset, flexdust_ds,x0,x1,y0,y1, height=None):
    dset = dset.sel(lon=slice(x0,x1), lat=slice(y0,y1))
    
    #interpolate flexdust to match flexpart coordinates
    flexdust_ds = flexdust_ds.interp({'lon':dset.lon,'lat':dset.lat})    
    
    if height == None:
        
        height = dset.height.values
    else:
        height = height
    scale_factor = (1/height)*1000
    print('creating output array')
    out_data = xr.zeros_like(dset['spec001_mr'])
    for i in range(len(out_data.time)):
        
        temp_data = dset['spec001_mr'].isel(time=i)
        time_steps = temp_data.time + temp_data.btime 
        emission_field = flexdust_ds['Emission'].sel(time=time_steps)
        out_data[i] = temp_data.values*emission_field.values*scale_factor
    print('finish emsfield*sensitvity')
    surface_sensitivity = dset['spec001_mr']
    iedate_stamp = dset.time[-1]
    ibdate_stamp = dset.time[0]
    dset.attrs['iedate'] = str(iedate_stamp.dt.strftime('%Y%m%d').values)
    dset.attrs['ietime'] = str(iedate_stamp.dt.strftime('%H%M%S').values)

    dset.attrs['ibdate'] = str(ibdate_stamp.dt.strftime('%Y%m%d').values)
    dset.attrs['ibtime'] = str(ibdate_stamp.dt.strftime('%H%M%S').values)
    return dset, out_data, surface_sensitivity
Esempio n. 21
0
    def distance(
        self,
        direction: str,
        x1: LabeledArray,
        x2: LabeledArray,
        t: LabeledArray,
    ) -> LabeledArray:
        """Implementation of calculation of physical distances between points
        in this coordinate system. This accounts for potential toroidal skew of
        lines.

        """
        c = np.ceil(x1).astype(int)
        f = np.floor(x1).astype(int)
        x_s = (self.x_start[c] - self.x_start[f]) * (x1 - f) + self.x_start[f]
        x_e = (self.x_end[c] - self.x_end[f]) * (x1 - f) + self.x_end[f]
        z_s = (self.z_start[c] - self.z_start[f]) * (x1 - f) + self.z_start[f]
        z_e = (self.z_end[c] - self.z_end[f]) * (x1 - f) + self.z_end[f]
        y_s = (self.y_start[c] - self.y_start[f]) * (x1 - f) + self.y_start[f]
        y_e = (self.y_end[c] - self.y_end[f]) * (x1 - f) + self.y_end[f]
        x = x_s + (x_e - x_s) * x2
        y = y_s + (y_e - y_s) * x2
        z = z_s + (z_e - z_s) * x2
        spacings = np.sqrt(
            x.diff(direction)**2 + z.diff(direction)**2 + y.diff(direction)**2)
        result = zeros_like(x)
        result[{direction: slice(1, None)}] = spacings.cumsum(direction)
        return result
def load_turbines(decommissioned=True, replace_nan_values="mean"):
    """Load list of all turbines from CSV file. Includes location, capacity,
    etc. Missing values are replaced with NaN values.

    The file uswtdb_v1_2_20181001.xml contains more information about the fields.

    Parameters
    ----------
    decommissioned : bool
        if True merge datasets from official CSV with Excel sheet received via e-mail
    replace_nan_values : str
        use data imputation to set missing values for turbine diameters and hub heights, set to ""
        to disable

    Returns
    -------
    xr.DataSet

    """
    turbines_dataframe = pd.read_csv(INPUT_DIR / "wind_turbines_usa" /
                                     "uswtdb_v3_0_1_20200514.csv")

    # TODO is this really how it is supposed to be done?
    turbines_dataframe.index = turbines_dataframe.index.rename("turbines")
    turbines = xr.Dataset.from_dataframe(turbines_dataframe)

    # Lets not use the turbine on Guam (avoids a huge bounding box for the USA)
    neglected_capacity_kw = turbines.sel(
        turbines=turbines.xlong >= 0).t_cap.sum()
    assert (neglected_capacity_kw == 275
            ), f"unexpected total capacity filtered: {neglected_capacity_kw}"
    turbines = turbines.sel(turbines=turbines.xlong < 0)
    turbines = turbines.set_index(turbines="case_id")

    turbines["is_decomissioned"] = xr.zeros_like(turbines.p_year,
                                                 dtype=np.bool)

    if not decommissioned:
        return turbines

    turbines_decomissioned = pd.read_excel(
        INPUT_DIR / "wind_turbines_usa" / "decom_clean_032520.xlsx",
        engine="openpyxl",
    )
    turbines_decomissioned = xr.Dataset(turbines_decomissioned).rename(
        dim_0="turbines")
    turbines_decomissioned = turbines_decomissioned.set_index(
        turbines="case_id")

    turbines = xr.merge((turbines, turbines_decomissioned))

    turbines["is_decomissioned"] = turbines.decommiss == "yes"
    turbines = turbines.drop_vars("decommiss")

    if replace_nan_values:
        turbines = estimate_missing(turbines, method=replace_nan_values)

    turbines = turbines.chunk(CHUNK_SIZE_TURBINES)

    return turbines
Esempio n. 23
0
def estimate_baseline(T_cal, order=1, weight=None):
    """Estimate polynomial baseline of each sample."""
    freq = T_cal.ch - T_cal.ch.mean()
    n_freq, n_poly = len(freq), order + 1

    # make design matrix
    X = np.zeros([n_freq, n_poly])

    for i in range(n_poly):
        poly = freq**i
        X[:, i] = poly / np.linalg.norm(poly)

    y = T_cal.values.T

    # estimate coeffs by solving linear regression problem
    if weight is None:
        weight = 1.0

    model = LinearRegression(fit_intercept=False)
    model.fit(X, y, sample_weight=weight)

    # estimate baseline
    T_base = xr.zeros_like(T_cal) + model.coef_ @ X.T

    for i in range(n_poly):
        T_base.coords[f"basis_{i}"] = "ch", X[:, i]
        T_base.coords[f"coeff_{i}"] = "t", model.coef_[:, i]

    return T_base
Esempio n. 24
0
def get_D_KL_from_xarray(da_P_X_Y, da_P_X, da_P_Y):
    """
    base 10 : Mutual information of
    I_matrix = xr.apply_ufunc(func_D_KL, P_X_Y, P_X, P_Y)
    return I_matrix.sum()
    """
    da_log2 = xr.zeros_like(da_P_X_Y)
    import itertools
    str_dim_x = da_P_X.dims[0]
    str_dim_y = da_P_Y.dims[0]
    for realiz_id_x, realiz_id_y in itertools.product(
            da_P_X_Y[str_dim_x].values, da_P_X_Y[str_dim_y].values):
        p_xy = da_P_X_Y.loc[{str_dim_x: realiz_id_x, str_dim_y: realiz_id_y}]
        p_x = da_P_X.loc[{str_dim_x: realiz_id_x}]
        p_y = da_P_Y.loc[{str_dim_y: realiz_id_y}]
        log_p_xy_over_p_x_p_y = ufunc_log_pxy_over_px_py(p_xy, p_x, p_y)
        da_log2.loc[{
            str_dim_x: realiz_id_x,
            str_dim_y: realiz_id_y
        }] = log_p_xy_over_p_x_p_y
        # da_log2.loc[{str_dim_x:realiz_id_x, str_dim_y:realiz_id_y}] =
    # print("da_log2: ", da_log2)
    # print("da_P_X_Y: ", da_P_X_Y)
    mutual_information = xr.dot(da_P_X_Y, da_log2)
    print("mutual_information (", str_dim_x, ", ", str_dim_y, "): ",
          mutual_information.values)
    return mutual_information
Esempio n. 25
0
def calc_correlation_field(xda,
                           mask,
                           dimlist=['Z', 'YC'],
                           n_shift=15,
                           mask_in_betweens=False):
    """calculate the correlation field for each shifted distance

    Parameters
    ----------
    xda : xarray.DataArray
        The field to compute correlations on, over the 'sample' dimension
    mask : xarra.DataArray
        True/False inside/outside of domain
    dimlist : list of str
        denoting dimensions to compute shifted correlations
    n_shift : int
        number of shifts to do
    mask_in_betweens : bool, optional
        if True, then if there is a portion of the domain such that for a
        particular dimension, there is a gap between two points, ignore all
        points with larger correlation length than where the gap occurs
        doesn't affect results much
    """

    xds = xr.Dataset()
    shifty = np.arange(-n_shift, n_shift + 1)
    shifty = xr.DataArray(shifty, coords={'shifty': shifty}, dims=('shifty', ))
    xds['shifty'] = shifty

    for dim in dimlist:
        corrfld = f'corr_{dim.lower()}'
        template = xda.isel(sample=0).drop('sample')
        xds[corrfld] = xr.zeros_like(shifty * template)

        x_deviation = (xda - xda.mean('sample')).where(mask)
        x_ssr = np.sqrt((x_deviation**2).sum('sample'))

        for s in shifty.values:
            y_deviation = x_deviation.shift({dim: s})
            numerator = (x_deviation * y_deviation).sum('sample')

            y_ssr = np.sqrt((y_deviation**2).sum('sample'))
            denominator = x_ssr * y_ssr

            xds[corrfld].loc[{'shifty': s}] = numerator / denominator

    if mask_in_betweens:
        for dim in dimlist:
            corrfld = f'corr_{dim.lower()}'
            for s in shifty.values:
                if s < 0:
                    bigger_than = shifty < s
                else:
                    bigger_than = shifty > s

                imnan = np.isnan(xds[corrfld].sel(shifty=s))
                xds[corrfld] = xr.where(bigger_than * imnan, np.nan,
                                        xds[corrfld])
    return xds
Esempio n. 26
0
 def test_zero_risk_error(self):
   participants, incidence_scenarios = (
       sim_test_util.participants_and_forecast())
   c = sim_test_util.c_to_test_events()
   c['incidence_scaler'] = xr.zeros_like(c.incidence_scaler)
   with self.assertRaisesRegex(ValueError,
                               'impossible to account for incidence!'):
     sim.control_arm_events(c, participants, incidence_scenarios)
Esempio n. 27
0
 def filter_months(data_array, month_list):
     # To define in GeoDataArray !!!!and GeoDS!!!!
     if month_list is not None:
         condition = xr.zeros_like(data_array.t)
         for i in range(len(data_array.t)):
             condition[i] = data_array.t[i].values[()].month in util.months_to_number(month_list)
         data_array = data_array.where(condition, drop=True)
     return data_array
Esempio n. 28
0
 def crop_months(self, new_month_list):
     condition = xr.zeros_like(self.data.t)
     for i in range(len(self.data.t)):
         condition[i] = self.data.t[i].values[()].month in util.months_to_number(new_month_list)
     self.data = self.data.where(condition, drop=True)
     self.months = new_month_list
     print("____ Data cropped to the new month list.")
     return self
def strategy(data):
    close = data['futures'].sel(field="close")
    commodity = data['commodity']
    if commodity.isel(time=-1) > commodity.isel(time=-2) and close.isel(
            time=-1) > close.isel(time=-20):
        return xr.ones_like(close.isel(time=-1))
    else:
        return xr.zeros_like(close.isel(time=-1))
Esempio n. 30
0
def gdf_to_model_dataset(model_ds,
                         gdf,
                         modelgrid,
                         name,
                         gridtype='structured'):
    """ create 3 data-arrays from a geodataframe with oppervlaktewater:
    - area: with the area of the geodataframe in the cell
    - cond: with the conductance based on the area and bweerstand column in gdf
    - peil: with the surface water lvl based on the peil column in the gdf


    Parameters
    ----------
    model_ds : xr.DataSet
        xarray with model data
    gdf : geopandas.GeoDataFrame
        polygon shapes with surface water.
    modelgrid : flopy grid
        model grid.
    name : str
        name of the polygon shapes, name is used to store data arrays in 
        model_ds

    Returns
    -------
    model_ds : xarray.Dataset
        dataset with modelgrid data. Has 

    """
    area = xr.zeros_like(model_ds['top'])
    cond = xr.zeros_like(model_ds['top'])
    peil = xr.zeros_like(model_ds['top'])
    for i, row in gdf.iterrows():
        area_pol = mgrid.polygon_to_area(modelgrid, row['geometry'],
                                         xr.ones_like(model_ds['top']),
                                         gridtype)
        cond = xr.where(area_pol > area, area_pol / row['bweerstand'], cond)
        peil = xr.where(area_pol > area, row['peil'], peil)
        area = xr.where(area_pol > area, area_pol, area)

    model_ds_out = util.get_model_ds_empty(model_ds)
    model_ds_out[f'{name}_area'] = area
    model_ds_out[f'{name}_cond'] = cond
    model_ds_out[f'{name}_peil'] = peil

    return model_ds_out
Esempio n. 31
0
def test_binarize():
    binarize_spec = Preprocessing(name="binarize", kwargs={"threshold": 14})
    data = xr.DataArray(np.arange(30).reshape(2, 3, 5), dims=("x", "y", "c"))
    expected = xr.zeros_like(data)
    expected[{"x": slice(1, None)}] = 1
    preprocessing = make_preprocessing([binarize_spec])
    result = preprocessing(data)
    xr.testing.assert_allclose(expected, result)
Esempio n. 32
0
    def test_inversion(self):

        # Download the RGI file for the run
        # Make a new dataframe of those
        rgidf = gpd.read_file(get_demo_file('SouthGlacier.shp'))

        # Go - initialize working directories
        gdirs = workflow.init_glacier_regions(rgidf)

        # Preprocessing tasks
        task_list = [
            tasks.glacier_masks,
            tasks.compute_centerlines,
            tasks.initialize_flowlines,
            tasks.catchment_area,
            tasks.catchment_intersections,
            tasks.catchment_width_geom,
            tasks.catchment_width_correction,
            tasks.process_cru_data,
            tasks.local_t_star,
            tasks.mu_star_calibration,
        ]
        for task in task_list:
            execute_entity_task(task, gdirs)

        # Inversion tasks
        execute_entity_task(tasks.prepare_for_inversion, gdirs)
        # We use the default parameters for this run
        execute_entity_task(tasks.mass_conservation_inversion, gdirs)
        execute_entity_task(tasks.distribute_thickness_per_altitude, gdirs,
                            varname_suffix='_alt')
        execute_entity_task(tasks.distribute_thickness_interp, gdirs,
                            varname_suffix='_int')

        # Reference data
        gdir = gdirs[0]
        df = self.get_ref_data(gdir)

        with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:

            v = ds.distributed_thickness_alt
            df['oggm_alt'] = v.isel(x=('z', df['i']), y=('z', df['j']))
            v = ds.distributed_thickness_int
            df['oggm_int'] = v.isel(x=('z', df['i']), y=('z', df['j']))

            ds['ref'] = xr.zeros_like(ds.distributed_thickness_int) * np.NaN
            ds['ref'].data[df['j'], df['i']] = df['thick']

        rmsd_int = ((df.oggm_int - df.thick) ** 2).mean() ** .5
        rmsd_alt = ((df.oggm_int - df.thick) ** 2).mean() ** .5
        assert rmsd_int < 80
        assert rmsd_alt < 80

        dfm = df.mean()
        np.testing.assert_allclose(dfm.thick, dfm.oggm_int, 50)
        np.testing.assert_allclose(dfm.thick, dfm.oggm_alt, 50)

        if do_plot:
            import matplotlib.pyplot as plt
            df.plot(kind='scatter', x='oggm_int', y='thick')
            plt.axis('equal')
            df.plot(kind='scatter', x='oggm_alt', y='thick')
            plt.axis('equal')
            f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 3))
            ds.ref.plot(ax=ax1)
            ds.distributed_thickness_int.plot(ax=ax2)
            ds.distributed_thickness_alt.plot(ax=ax3)
            plt.tight_layout()
            plt.show()
Esempio n. 33
0
    def test_optimize_inversion(self):

        # Download the RGI file for the run
        # Make a new dataframe of those
        rgidf = gpd.read_file(get_demo_file('SouthGlacier.shp'))

        # Go - initialize working directories
        gdirs = workflow.init_glacier_regions(rgidf)

        # Preprocessing tasks
        task_list = [
            tasks.glacier_masks,
            tasks.compute_centerlines,
            tasks.initialize_flowlines,
            tasks.catchment_area,
            tasks.catchment_intersections,
            tasks.catchment_width_geom,
            tasks.catchment_width_correction,
            tasks.process_cru_data,
            tasks.local_t_star,
            tasks.mu_star_calibration,
        ]
        for task in task_list:
            execute_entity_task(task, gdirs)

        # Reference data
        gdir = gdirs[0]
        df = self.get_ref_data(gdir)

        # Inversion tasks
        execute_entity_task(tasks.prepare_for_inversion, gdirs)

        glen_a = cfg.PARAMS['inversion_glen_a']
        fs = cfg.PARAMS['inversion_fs']

        def to_optimize(x):
            tasks.mass_conservation_inversion(gdir,
                                              glen_a=glen_a * x[0],
                                              fs=fs * x[1])
            tasks.distribute_thickness_per_altitude(gdir)
            with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
                thick = ds.distributed_thickness.isel(x=('z', df['i']),
                                                      y=('z', df['j']))
                out = (np.abs(thick - df.thick)).mean()
            return out

        opti = optimization.minimize(to_optimize, [1., 1.],
                                     bounds=((0.01, 10), (0.01, 10)),
                                     tol=0.1)
        # Check results and save.
        execute_entity_task(tasks.mass_conservation_inversion, gdirs,
                            glen_a=glen_a*opti['x'][0],
                            fs=0)
        execute_entity_task(tasks.distribute_thickness_per_altitude, gdirs)

        with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds:
            df['oggm'] = ds.distributed_thickness.isel(x=('z', df['i']),
                                                       y=('z', df['j']))
            ds['ref'] = xr.zeros_like(ds.distributed_thickness) * np.NaN
            ds['ref'].data[df['j'], df['i']] = df['thick']

        rmsd = ((df.oggm - df.thick) ** 2).mean() ** .5
        assert rmsd < 60

        dfm = df.mean()
        np.testing.assert_allclose(dfm.thick, dfm.oggm, 10)
        if do_plot:
            import matplotlib.pyplot as plt
            df.plot(kind='scatter', x='oggm', y='thick')
            plt.axis('equal')
            f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 3))
            ds.ref.plot(ax=ax1)
            ds.distributed_thickness.plot(ax=ax2)
            plt.tight_layout()
            plt.show()