Beispiel #1
0
def test_combine_by_coords_distant_cftime_dates():
    # Regression test for https://github.com/pydata/xarray/issues/3535
    import cftime

    time_1 = [cftime.DatetimeGregorian(4500, 12, 31)]
    time_2 = [cftime.DatetimeGregorian(4600, 12, 31)]
    time_3 = [cftime.DatetimeGregorian(5100, 12, 31)]

    da_1 = DataArray([0], dims=["time"], coords=[time_1],
                     name="a").to_dataset()
    da_2 = DataArray([1], dims=["time"], coords=[time_2],
                     name="a").to_dataset()
    da_3 = DataArray([2], dims=["time"], coords=[time_3],
                     name="a").to_dataset()

    result = combine_by_coords([da_1, da_2, da_3])

    expected_time = np.concatenate([time_1, time_2, time_3])
    expected = DataArray([0, 1, 2],
                         dims=["time"],
                         coords=[expected_time],
                         name="a").to_dataset()
    assert_identical(result, expected)
Beispiel #2
0
def ensure_cftime_array(time: Sequence):
    """Convert an input 1D array to an array of cftime objects. Python's datetime are converted to cftime.DatetimeGregorian.

    Raises ValueError when unable to cast the input.
    """
    if isinstance(time, xr.DataArray):
        time = time.indexes["time"]
    elif isinstance(time, np.ndarray):
        time = pd.DatetimeIndex(time)
    if isinstance(time[0], cftime.datetime):
        return time
    if isinstance(time[0], pydt.datetime):
        return np.array(
            [cftime.DatetimeGregorian(*ele.timetuple()[:6]) for ele in time])
    raise ValueError("Unable to cast array to cftime dtype")
Beispiel #3
0
def test_combine_by_coords_raises_for_differing_calendars():
    # previously failed with uninformative StopIteration instead of TypeError
    # https://github.com/pydata/xarray/issues/4495

    import cftime

    time_1 = [cftime.DatetimeGregorian(2000, 1, 1)]
    time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)]

    da_1 = DataArray([0], dims=["time"], coords=[time_1],
                     name="a").to_dataset()
    da_2 = DataArray([1], dims=["time"], coords=[time_2],
                     name="a").to_dataset()

    with raises_regex(TypeError, r"cannot compare .* \(different calendars\)"):
        combine_by_coords([da_1, da_2])
Beispiel #4
0
def return_ibtracs_storm(fname, storm_idx, variables):
    ibnc = xr.open_dataset(fname, mask_and_scale=False)
    nc = ibnc.sel(storm=storm_idx)

    # remove fill values and append data to dictionary
    d = dict()
    for v in variables:
        vv = nc[v]
        if v == 'time':
            fv = cftime.DatetimeGregorian(-25518, 1, 28, 0, 0, 0, 0)
        else:
            fv = vv._FillValue
        data = vv.values[vv != fv]
        if v == 'landfall':  # there is always one less landfall value, replace with last value
            data = np.append(data, data[-1])
        d[v] = data
    return d
Beispiel #5
0
def test_combine_by_coords_raises_for_differing_calendars():
    # previously failed with uninformative StopIteration instead of TypeError
    # https://github.com/pydata/xarray/issues/4495

    import cftime

    time_1 = [cftime.DatetimeGregorian(2000, 1, 1)]
    time_2 = [cftime.DatetimeProlepticGregorian(2001, 1, 1)]

    da_1 = DataArray([0], dims=["time"], coords=[time_1],
                     name="a").to_dataset()
    da_2 = DataArray([1], dims=["time"], coords=[time_2],
                     name="a").to_dataset()

    if LooseVersion(cftime.__version__) >= LooseVersion("1.5"):
        error_msg = "Cannot combine along dimension 'time' with mixed types."
    else:
        error_msg = r"cannot compare .* \(different calendars\)"

    with pytest.raises(TypeError, match=error_msg):
        combine_by_coords([da_1, da_2])
def stack_by_init_date(
    ds,
    init_dates,
    n_lead_steps,
    time_dim="time",
    init_dim="init_date",
    lead_dim="lead_time",
    time_rounding="D",
):
    """Stack timeseries array in inital date / lead time format.

    Parameters
    ----------
    ds : xarray DataArray or Dataset
        Input array containing a time dimension
    period : list
        List of initial dates of the same object type as the times in
        the time dimension of ds
    n_lead_steps: int
        Maximum number of lead time steps
    time_dim: str, default 'time'
        Name of the time dimension in ds
    init_dim: str, default 'init_date'
        Name of the initial date dimension to create in the output
    lead_dim: str, default 'lead_time'
        Name of the lead time dimension to create in the output
    time_rounding :  {'A', 'M', 'D'}, default 'D'
        Match time axis and init dates by floor rounding to nearest day, month, or year

    Returns
    -------
    stacked : xarray DataArray or Dataset
        Array with data stacked by specified initial dates and lead steps

    Notes
    -----
    Only initial dates that fall within the time range of the input
    timeseries are retained. Thus, inital dates prior to the time range of
    the input timeseries that include data at longer lead times are not
    included in the output dataset. To include these data, prepend the input
    timeseries with nans so that the initial dates in question are present
    in the time dimension of the input timeseries.
    """
    # Only keep init dates that fall within available times
    times = ds[time_dim]
    init_dates = init_dates[
        np.logical_and(init_dates >= times.min(), init_dates <= times.max())
    ]

    # Initialise indexes of specified inital dates and time info for each initial date
    time2d = np.empty((len(init_dates), n_lead_steps), "object")
    time2d[:] = cftime.DatetimeGregorian(
        3000, 1, 1
    )  # Year 3000 where data do not exist
    init_date_indexes = []
    for ndate, init_date in enumerate(init_dates):
        start_index = _get_match_index(times, init_date.item(), time_rounding)
        end_index = start_index + n_lead_steps
        time_slice = ds[time_dim][start_index:end_index]
        time2d[ndate, : len(time_slice)] = time_slice
        init_date_indexes.append(start_index)

    # Use `rolling` to stack timeseries like forecasts
    # Note, rolling references each window to the RH edge of the window. Hence we reverse the timeseries
    # so that each window starts at the specified initial date and includes n_lead_steps to the right of
    # that element
    ds = ds.copy().sel({time_dim: slice(None, None, -1)})
    init_date_indexes = [ds.sizes[time_dim] - 1 - i for i in init_date_indexes]

    ds = ds.rolling({time_dim: n_lead_steps}, min_periods=1).construct(
        lead_dim, keep_attrs=True
    )
    ds = ds.isel({time_dim: init_date_indexes})

    # Account for reversal of timeseries
    ds = ds.sel({lead_dim: slice(None, None, -1)})

    ds = ds.rename({time_dim: init_dim})
    ds = ds.assign_coords({lead_dim: ds[lead_dim].values})
    ds = ds.assign_coords({time_dim: ([init_dim, lead_dim], time2d)})
    ds = ds.assign_coords({init_dim: init_dates.values})

    return ds
def main(f, years):
    sDir = os.path.dirname(f)
    ncfile = xr.open_dataset(f, mask_and_scale=False)

    yrs = np.arange(years[0], years[1] + 1, 1)

    sf = pd.read_csv(os.path.join(sDir, 'summary_1970-2019.csv'))
    hindex = list(sf['findex'])

    storms_all = dict()
    storms_major = dict()

    for yr in yrs:
        storms_all[yr] = 0
        storms_major[yr] = 0

    # fig_all, ax_all = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree()))
    # fig_major, ax_major = plt.subplots(subplot_kw=dict(projection=ccrs.PlateCarree()))

    fig_all, ax_all = plt.subplots(subplot_kw=dict(projection=ccrs.Robinson()))
    fig_major, ax_major = plt.subplots(subplot_kw=dict(projection=ccrs.Robinson()))
    ax_lims = [-120, 0, 0, 55]

    for i, hi in enumerate(hindex):
        # set up map axes
        if i == 0:
            add_map_features(ax_all, ax_lims)
            add_map_features(ax_major, ax_lims)

        ncf = ncfile.sel(storm=hi)
        lat = ncf.lat.values
        lat[lat == -9999] = np.nan
        lon = ncf.lon.values
        lon[lon == -9999] = np.nan

        category = np.nanmax(ncf.usa_sshs.values)

        # distance from land is < 60 nmile (111 km)
        lf = ncf.landfall.values.astype('float')
        lf[lf == -9999] = np.nan  # convert fill values to nan
        minlf = np.nanmin(lf)
        lf_ind = np.where(lf < 111)[0]
        lf_lon = lon[lf_ind]

        # choose when landfall is < 60 nmile and the storm is west of 40 degrees W
        #if np.logical_and(minlf < 111, any(lf_lon < -60)):
        if np.logical_and(minlf < 111, any(lf_lon < -40)):
            nsamerica_lf = 'yes'
        else:
            nsamerica_lf = 'no'

        lw = 1
        bc = 'darkgray'
        alpha = .6
        mk = 'None'

        # count the storms that make landfall west of 40 degrees W each year
        if np.logical_and(category >= 0, nsamerica_lf == 'yes'):
            t0 = min(t for t in ncf.time.values if t > cftime.DatetimeGregorian(1800, 1, 1, 0, 0, 0, 0))
            storms_all[t0.year] = storms_all[t0.year] + 1
            ax_all.plot(lon, lat, c='r', marker=mk, linewidth=lw, transform=ccrs.PlateCarree())

            if category >= 3:
                storms_major[t0.year] = storms_major[t0.year] + 1
                ax_major.plot(lon, lat, c='r', marker=mk, linewidth=lw, transform=ccrs.PlateCarree())
            else:
                ax_major.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree())
        else:
            ax_all.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree())
            ax_major.plot(lon, lat, c=bc, marker=mk, linewidth=lw, alpha=alpha, transform=ccrs.PlateCarree())

    # export_df(storms_all, os.path.join(sDir, 'NA_landfalling_storms_all_1970-2019-test.csv'))
    # export_df(storms_major, os.path.join(sDir, 'NA_landfalling_storms_major_1970-2019-test.csv'))

    fig_all.savefig(os.path.join(sDir, 'NA_storms_all_1970-2019-test40deg.png'), dpi=300)
    plt.close(fig_all)

    fig_major.savefig(os.path.join(sDir, 'NA_storms_major_1970-2019-test40deg.png'), dpi=300)
    plt.close(fig_major)
Beispiel #8
0
def generate_range(
    start: cftime.datetime,
    end: cftime.datetime,
    offset: cftime_offsets.BaseCFTimeOffset,
) -> Iterable[cftime.datetime]:
    """
    Generate a range of datetime objects between start and end, using offset to
    determine the steps.

    The range will extend both ends of the span to the next valid timestep, see
    examples.

    Parameters
    ----------
    start: :class:`cftime.datetime`
        Starting datetime from which to generate the range (noting roll backward
        mentioned above and illustrated in the examples).

    end: :class:`cftime.datetime`
        Last datetime from which to generate the range (noting roll forward mentioned
        above and illustrated in the examples).

    offset:
        Offset object for determining the timesteps.

    Yields
    ------
    :class:`cftime.datetime`
        Next datetime in the range

    Raises
    ------
    ValueError
        Offset does not result in increasing :class:`cftime.datetime`'s

    Examples
    --------
    The range is extended at either end to the nearest timestep. In the example below,
    the first timestep is rolled back to 1st Jan 2001 whilst the last is extended to 1st
    Jan 2006.

    >>> import datetime as dt
    >>> from pprint import pprint
    >>> from scmdata.offsets import to_offset, generate_range
    >>> g = generate_range(
    ...     dt.datetime(2001, 4, 1),
    ...     dt.datetime(2005, 6, 3),
    ...     to_offset("AS"),
    ... )

    >>> pprint([d for d in g])
    [cftime.datetime(2001, 1, 1, 0, 0),
     cftime.datetime(2002, 1, 1, 0, 0),
     cftime.datetime(2003, 1, 1, 0, 0),
     cftime.datetime(2004, 1, 1, 0, 0),
     cftime.datetime(2005, 1, 1, 0, 0),
     cftime.datetime(2006, 1, 1, 0, 0)]

    In this example the first timestep is rolled back to 31st Dec 2000 whilst the last
    is extended to 31st Dec 2005.

    >>> g = generate_range(
    ...     dt.datetime(2001, 4, 1),
    ...     dt.datetime(2005, 6, 3),
    ...     to_offset("A"),
    ... )
    >>> pprint([d for d in g])
    [cftime.datetime(2000, 12, 31, 0, 0),
     cftime.datetime(2001, 12, 31, 0, 0),
     cftime.datetime(2002, 12, 31, 0, 0),
     cftime.datetime(2003, 12, 31, 0, 0),
     cftime.datetime(2004, 12, 31, 0, 0),
     cftime.datetime(2005, 12, 31, 0, 0)]

    In this example the first timestep is already on the offset so stays there, the last
    timestep is to 1st Sep 2005.

    >>> g = generate_range(
    ...     dt.datetime(2001, 4, 1),
    ...     dt.datetime(2005, 6, 3),
    ...     to_offset("QS"),
    ... )
    >>> pprint([d for d in g])
    [cftime.datetime(2001, 4, 1, 0, 0),
     cftime.datetime(2001, 7, 1, 0, 0),
     cftime.datetime(2001, 10, 1, 0, 0),
     cftime.datetime(2002, 1, 1, 0, 0),
     cftime.datetime(2002, 4, 1, 0, 0),
     cftime.datetime(2002, 7, 1, 0, 0),
     cftime.datetime(2002, 10, 1, 0, 0),
     cftime.datetime(2003, 1, 1, 0, 0),
     cftime.datetime(2003, 4, 1, 0, 0),
     cftime.datetime(2003, 7, 1, 0, 0),
     cftime.datetime(2003, 10, 1, 0, 0),
     cftime.datetime(2004, 1, 1, 0, 0),
     cftime.datetime(2004, 4, 1, 0, 0),
     cftime.datetime(2004, 7, 1, 0, 0),
     cftime.datetime(2004, 10, 1, 0, 0),
     cftime.datetime(2005, 1, 1, 0, 0),
     cftime.datetime(2005, 4, 1, 0, 0),
     cftime.datetime(2005, 7, 1, 0, 0)]
    """
    # Uses the Gregorian calendar - allows for adding/subtracting datetime.timedelta in range calc
    start_cf = cftime.DatetimeGregorian(*start.timetuple()[:6])
    end_cf = cftime.DatetimeGregorian(*end.timetuple()[:6])

    res = cftime_offsets.cftime_range(offset.rollback(start_cf),
                                      offset.rollforward(end_cf),
                                      freq=offset)

    return [cftime.datetime(*dt.timetuple()[:6]) for dt in res]
Beispiel #9
0
def main():
    import argparse

    locations = yaml.safe_load(open('locations.yml'))
    variables_def = yaml.safe_load(open('indicators.yml'))
    assets = yaml.safe_load(open('assets.yml'))
    cmip6_yml = yaml.safe_load(open('cmip6.yml'))

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--max-workers",
        type=int,
        default=4,
        help=
        "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod."
    )
    # g = parser.add_argument_group('variables or asset')
    g = parser.add_mutually_exclusive_group(required=True)
    # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)')
    # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download')
    g.add_argument('--indicators',
                   nargs='*',
                   default=[],
                   choices=[vdef['name'] for vdef in variables_def],
                   help='list of custom indicators to download')
    g.add_argument(
        '--asset',
        choices=list(assets.keys()),
        help=
        'pre-defined list of variables, defined in assets.yml (experimental)')

    parser.add_argument(
        '--dataset',
        choices=['era5', 'cmip6'],
        help='dataset in combination with for `--indicators` and `--asset`')
    parser.add_argument('-o',
                        '--output',
                        default='indicators',
                        help='output directory, default: %(default)s')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help=argparse.SUPPRESS)

    g = parser.add_argument_group('location')
    g.add_argument('--location',
                   choices=[loc['name'] for loc in locations],
                   help='location name defined in locations.yml')
    g.add_argument('--lon', type=float)
    g.add_argument('--lat', type=float)

    g = parser.add_argument_group('area size controls')
    g.add_argument(
        '--area',
        nargs=4,
        type=float,
        help='area as four numbers: top, left, bottom, right (CDS convention)')
    g.add_argument(
        '--width-km',
        type=float,
        default=1000,
        help=
        "Width (km) around the selected location, when not provided by `area`. %(default)s km by default."
    )
    g.add_argument(
        '--view',
        nargs=4,
        type=float,
        help=
        'area for plot as four numbers: top, left, bottom, right (CDS convention)'
    )

    g = parser.add_argument_group('ERA5 control')
    # g.add_argument('--year', nargs='+', default=list(range(1979, 2019+1)), help='ERA5 years to download, default: %(default)s')
    g.add_argument('--year',
                   nargs='+',
                   default=list(range(1979, 2019 + 1)),
                   help=argparse.SUPPRESS)

    g = parser.add_argument_group('CMIP6 control')
    g.add_argument('--model',
                   nargs='*',
                   default=None,
                   choices=get_all_models())
    g.add_argument('--experiment',
                   nargs='*',
                   choices=cmip6_yml["experiments"],
                   default=['ssp5_8_5'])
    # g.add_argument('--period', default=None, help=argparse.SUPPRESS) # all CMIP6 models and future experiements share the same parameter...
    # g.add_argument('--historical', action='store_true', help='this flag provokes downloading historical data as well and extend back the CMIP6 timeseries to 1979')
    g.add_argument('--historical',
                   action='store_true',
                   default=True,
                   help=argparse.SUPPRESS)
    g.add_argument('--no-historical',
                   action='store_false',
                   dest='historical',
                   help=argparse.SUPPRESS)
    # g.add_argument('--bias-correction', action='store_true', help='align CMIP6 variables with matching ERA5')
    g.add_argument('--bias-correction',
                   action='store_true',
                   default=True,
                   help=argparse.SUPPRESS)
    g.add_argument('--no-bias-correction',
                   action='store_false',
                   dest='bias_correction',
                   help='suppress bias-correction for CMIP6 data')
    g.add_argument(
        '--reference-period',
        default=[1979, 2019],
        nargs=2,
        type=int,
        help='reference period for bias correction (default: %(default)s)')
    g.add_argument('--yearly-bias',
                   action='store_true',
                   help='yearly instead of monthly bias correction')
    g.add_argument(
        '--ensemble',
        action='store_true',
        help=
        'If `--model` is not specified, default to all available models. Also write a csv file with all models as columns, as well as median, lower and upper (5th and 95th percentiles) fields.'
    )

    g = parser.add_argument_group('visualization')
    g.add_argument('--view-region', action='store_true')
    g.add_argument('--view-timeseries', action='store_true')
    g.add_argument('--png-region', action='store_true')
    g.add_argument('--png-timeseries', action='store_true')
    g.add_argument('--dpi',
                   default=100,
                   type=int,
                   help='dop-per-inches (default: %(default)s)')
    g.add_argument('--yearly-mean', action='store_true')

    o = parser.parse_args()

    if not (o.location or (o.lon and o.lat)):
        parser.error(
            'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`'
        )

    elif o.location:
        loc = {loc['name']: loc for loc in locations}[o.location]
        o.lon, o.lat = loc['lon'], loc['lat']
        if 'area' in loc and not o.area:
            o.area = loc['area']

    if not o.area:
        o.area = make_area(o.lon, o.lat, o.width_km)

    print('lon', o.lon)
    print('lat', o.lat)

    if not o.asset and not o.indicators:
        parser.error(
            'please provide indicators, for example: `--indicators 2m_temperature` or asset, e.g. `--asset energy`'
        )

    # assets only contain indicators
    if o.asset:
        for vname in assets[o.asset]:
            if vname not in [v['name'] for v in variables_def]:
                parser.error(
                    f'unknown indicator in assets.yml: {vname}. See indicators.yml for indicator definition'
                )
            o.indicators.append(vname)

    # folder structure for CSV results
    loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E'
    asset_folder = o.asset if o.asset else 'all'

    if o.model is None:
        if o.ensemble:
            o.model = get_all_models()
        else:
            o.model = 'mpi_esm1_2_lr'

    # loop over indicators
    vdef_by_name = {v['name']: v for v in variables_def}
    for name in o.indicators:

        variables = []  # each variable for the simulation set

        vdef = vdef_by_name[name]
        indicator_def = dict(name=name,
                             units=vdef.get('units'),
                             description=vdef.get('description'),
                             scale=vdef.get('scale', 1),
                             offset=vdef.get('offset', 0))

        vdef2 = vdef.get('era5', {})
        era5_kwargs = dict(area=o.area, year=o.year)
        era5 = parse_indicator(ERA5,
                               defs=vdef2,
                               cls_kwargs=era5_kwargs,
                               **indicator_def)

        era5.simulation_set = 'ERA5'
        era5.set_folder = 'era5'
        era5.alias = name

        if not o.dataset or o.dataset == 'era5' or o.bias_correction:
            variables.append(era5)

        vdef2 = vdef.get('cmip6', {})
        transform = Transform(vdef2.get('scale', 1), vdef2.get('offset', 0))

        if not o.dataset or o.dataset == 'cmip6':
            for model in o.model:
                labels = {
                    x: "{}-{}.{}".format(*x.split("_"))
                    for x in cmip6_yml["experiments"]
                }
                # if o.historical:
                #     historical_kwargs = dict(model=model, experiment='historical')
                #     historical = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=historical_kwargs, **indicator_def)
                # else:
                #     historical = None
                for experiment in o.experiment:
                    cmip6_kwargs = dict(model=model,
                                        experiment=experiment,
                                        historical=o.historical,
                                        area=o.area)
                    cmip6 = parse_indicator(CMIP6,
                                            defs=vdef2,
                                            cls_kwargs=cmip6_kwargs,
                                            **indicator_def)
                    cmip6.reference = era5
                    cmip6.simulation_set = f'CMIP6 - {labels.get(experiment, experiment)} - {model}'
                    cmip6.set_folder = f'cmip6-{model}-{experiment}'
                    cmip6.alias = name
                    # print("indicator variable", experiment, [d.name for d in cmip6.datasets])
                    variables.append(cmip6)

        if not variables:
            logging.warning(f'no variable for {name}')
            continue

        if o.max_workers < 2:
            variables2 = download_all_variables_serial(variables)
        else:
            variables2 = download_all_variables(variables)

        # Diagnose which variables have been excluded
        names = list(set([v.name for v in variables]))
        names2 = list(set([v.name for v in variables2]))

        models = list(
            set([
                v.datasets[0].model for v in variables
                if isinstance(v.datasets[0], CMIP6)
            ]))
        models2 = list(
            set([
                v.datasets[0].model for v in variables2
                if isinstance(v.datasets[0], CMIP6)
            ]))

        print(f"Downloaded {len(variables2)} out of {len(variables)}")
        print(f"... {len(names2)} out of {len(names)} variable types")
        print(f"... {len(models2)} out of {len(models)} models")
        print("CMIP6 models excluded:",
              " ".join([m for m in models if m not in models2]))
        print("CMIP6 models included:", " ".join(models2))

        variables = variables2

        # download and convert to csv
        for v in variables:
            folder = os.path.join(o.output, loc_folder, asset_folder,
                                  v.set_folder)
            v.csv_file = os.path.join(folder, (v.alias or v.variable) + '.csv')

            if os.path.exists(v.csv_file):
                print("Already exitst:", v.csv_file)
                continue

            series = v.load_timeseries(o.lon, o.lat, overwrite=o.overwrite)

            bias_correction_method = vdef.get('bias-correction')

            if o.bias_correction and isinstance(
                    v.datasets[0],
                    CMIP6) and bias_correction_method is not None:
                era5 = v.reference.load_timeseries(o.lon, o.lat)
                #v.set_folder += '-unbiased'
                if o.yearly_bias:
                    series = correct_yearly_bias(series, era5,
                                                 o.reference_period,
                                                 bias_correction_method)
                else:
                    series = correct_monthly_bias(series, era5,
                                                  o.reference_period,
                                                  bias_correction_method)

            os.makedirs(folder, exist_ok=True)
            print("Save to", v.csv_file)
            save_csv(series, v.csv_file)

        if o.ensemble:
            ensemble_files = {}
            import cftime, datetime
            for experiment in o.experiment:
                ensemble_variables = [
                    v for v in variables if isinstance(v.datasets[0], CMIP6)
                    and v.datasets[0].experiment == experiment
                ]
                dates = np.array([
                    cftime.DatetimeGregorian(y, m, 15)
                    for y in range(1979, 2100 + 1) for m in range(1, 12 + 1)
                ])
                index = pd.Index(cftime.date2num(dates, time_units),
                                 name=time_units)

                df = {}
                for v in ensemble_variables:
                    series = load_csv(v.csv_file)
                    series.index = index[:len(series)]
                    df[v.datasets[0].model] = series
                df = pd.DataFrame(df)
                median = df.median(axis=1)
                lower = df.quantile(.05, axis=1)
                upper = df.quantile(.95, axis=1)
                df["median"] = median
                df["lower"] = lower
                df["upper"] = upper
                first = ensemble_variables[0]
                folder = os.path.join(
                    o.output, loc_folder, asset_folder,
                    first.set_folder.replace(first.datasets[0].model,
                                             "ensemble"))
                csv_file = os.path.join(folder, first.alias
                                        or first.name) + '.csv'
                ensemble_files[experiment] = csv_file
                os.makedirs(folder, exist_ok=True)
                print("Save to", csv_file)
                save_csv(df, csv_file)

        if o.view_region or o.view_timeseries or o.png_region or o.png_timeseries:
            import matplotlib.pyplot as plt
            cb = None
            try:
                import cartopy
                import cartopy.crs as ccrs
                kwargs = dict(projection=ccrs.PlateCarree())
            except ImportError:
                logging.warning('install cartopy to benefit from coastlines')
                cartopy = None
                kwargs = {}

            if o.view is None:
                o.view = o.area

            def plot_timeseries(v):
                figname = v.csv_file.replace('.csv', '.png')
                if os.path.exists(figname):
                    return

                fig2 = plt.figure(num=2)
                plt.clf()
                ax2 = fig2.add_subplot(1, 1, 1)

                ts = load_csv(v.csv_file)
                # convert units for easier reading of graphs
                ts.index = convert_time_units_series(ts.index, years=True)
                # ts.plot(ax=ax2, label=v.simulation_set)
                l, = ax2.plot(ts.index, ts.values, label=v.simulation_set)
                ax2.legend()
                ax2.set_xlabel(ts.index.name)
                ax2.set_ylabel(v.units)
                ax2.set_title(name)

                # add yearly mean as well
                if o.yearly_mean:
                    yearly_mean = ts.rolling(12).mean()
                    l2, = ax2.plot(ts.index[::12],
                                   yearly_mean[::12],
                                   alpha=1,
                                   linewidth=2,
                                   color=l.get_color())

                if o.png_timeseries:
                    fig2.savefig(figname, dpi=o.dpi)

            def plot_region(v):
                v0 = v.datasets[0]

                figname = v.csv_file.replace('.csv', '-region.png')
                if os.path.exists(figname):
                    return

                fig1 = plt.figure(num=1)
                plt.clf()
                ax1 = fig1.add_subplot(1, 1, 1, **kwargs)

                if isinstance(v.datasets[0], ERA5):
                    y1, y2 = o.reference_period
                    roll = False
                    title = f'ERA5: {y1}-{y2}'
                else:
                    y1, y2 = 2071, 2100
                    roll = True if o.view[1] < 0 else False
                    title = f'{labels.get(v0.experiment, v0.experiment)} ({v0.model}): {y1}-{y2}'

                refslice = slice(str(y1), str(y2))
                map = v.load_cube(time=refslice, area=o.view,
                                  roll=roll).mean(dim='time')

                h = ax1.imshow(map.values[::-1],
                               extent=cube_area(map, extent=True))
                cb = plt.colorbar(h, ax=ax1, label=f'{name} ({v.units})')
                # h = map.plot(ax=ax1, cbar_kwargs={'label':f'{v.units}'}, robust=True)
                ax1.set_title(title)
                ax1.plot(o.lon, o.lat, 'ko')

                if cartopy:
                    ax1.coastlines(resolution='10m')

                if o.png_region:
                    fig1.savefig(figname, dpi=o.dpi)

            for v in variables:

                if o.view_timeseries or o.png_timeseries:
                    plot_timeseries(v)

                if o.view_region or o.png_region:
                    try:
                        plot_region(v)
                    except:
                        logging.warning(f'failed to make map for {v.name}')

            # all simulation sets on one figure
            def plot_all_simulations():
                figname = os.path.join(o.output, loc_folder, asset_folder,
                                       'all_' + name + '.png')
                if os.path.exists(figname):
                    return

                fig3 = plt.figure(num=3)
                plt.clf()
                ax3 = fig3.add_subplot(1, 1, 1)
                for v in variables:
                    ts = load_csv(v.csv_file)
                    ts.index = convert_time_units_series(ts.index, years=True)
                    if isinstance(v.datasets[0], ERA5):
                        color = 'k'
                        zorder = 5
                    else:
                        color = None
                        zorder = None

                    # add yearly mean instead of monthly mean
                    if o.yearly_mean:
                        yearly_mean = ts.rolling(12).mean()
                        x = ts.index[::12]
                        y = yearly_mean[::12]
                    else:
                        x = ts.index
                        y = ts.values

                    l, = ax3.plot(x,
                                  y,
                                  alpha=0.5 if o.ensemble else 1,
                                  label=v.simulation_set,
                                  linewidth=1 if o.ensemble else 2,
                                  color=color,
                                  zorder=zorder)

                # Add ensemble mean
                if o.ensemble:
                    for experiment in ensemble_files:
                        df = load_csv(ensemble_files[experiment])
                        df.index = convert_time_units_series(df.index,
                                                             years=True)

                        if o.yearly_mean:
                            yearly_mean = df.rolling(12).mean()
                            x = df.index[::12]
                            y = yearly_mean.iloc[::12]
                        else:
                            x = df.index
                            y = df

                        l, = ax3.plot(x,
                                      y["median"],
                                      alpha=1,
                                      label=f"{experiment} (median)",
                                      linewidth=2,
                                      zorder=4)
                        ax3.plot(x,
                                 y["lower"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.plot(x,
                                 y["upper"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.fill_between(x,
                                         y["lower"],
                                         y["upper"],
                                         alpha=0.2,
                                         zorder=-1,
                                         color=l.get_color())

                ax3.legend(fontsize='xx-small')
                ax3.set_ylabel(v.units)
                ax3.set_xlabel(ts.index.name)
                ax3.set_title(name)
                # ax3.set_xlim(xmin=start_year, xmax=2100)

                mi, ma = ax3.get_xlim()
                if mi < 0:
                    ax3.set_xlim(
                        xmin=0)  # start at start_year (i.e. ERA5 start)

                if o.png_timeseries:
                    fig3.savefig(figname, dpi=max(o.dpi, 300))

            if o.view_timeseries or o.png_timeseries:
                plot_all_simulations()

    if o.view_timeseries or o.view_region:
        plt.show()
import numpy as np

from forest import util


@pytest.mark.parametrize("given,expect", [
    pytest.param('2019-10-10 01:02:34',
                 dt.datetime(2019, 10, 10, 1, 2, 34),
                 id="str with space"),
    pytest.param('2019-10-10T01:02:34',
                 dt.datetime(2019, 10, 10, 1, 2, 34),
                 id="iso8601"),
    pytest.param(np.datetime64('2019-10-10T11:22:33'),
                 dt.datetime(2019, 10, 10, 11, 22, 33),
                 id="datetime64"),
    pytest.param(cftime.DatetimeGregorian(2019, 10, 10, 11, 22, 33),
                 dt.datetime(2019, 10, 10, 11, 22, 33),
                 id="cftime.DatetimeGregorian"),
])
def test__to_datetime(given, expect):
    assert util.to_datetime(given) == expect


class Test_to_datetime(unittest.TestCase):
    def test_datetime(self):
        now = dt.datetime.now()
        result = util.to_datetime(now)
        self.assertEqual(result, now)

    def test_unsupported(self):
        with self.assertRaisesRegex(Exception, 'Unknown value'):
Beispiel #11
0
def main():

    import argparse

    locations = yaml.safe_load(open('locations.yml'))

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--max-workers",
        type=int,
        default=4,
        help=
        "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod."
    )
    # g = parser.add_argument_group('variables or asset')
    # g = parser.add_mutually_exclusive_group(required=True)
    # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)')
    # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download')
    parser.add_argument('--indicator', required=True, choices=VARIABLES)

    # parser.add_argument('--dataset', choices=['era5', 'cmip6'], help='dataset in combination with for `--indicators` and `--asset`')
    parser.add_argument('-o',
                        '--output',
                        default='indicators',
                        help='output directory, default: %(default)s')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help=argparse.SUPPRESS)

    g = parser.add_argument_group('location')
    g.add_argument('--location',
                   choices=[loc['name'] for loc in locations],
                   help='location name defined in locations.yml')
    g.add_argument('--lon', type=float)
    g.add_argument('--lat', type=float)

    g = parser.add_argument_group('CMIP6 control')
    g.add_argument('--model', nargs='+', default=None, choices=MODELS)
    g.add_argument(
        '--ensemble_member',
        default=None,
        help="typically `r1i1p1f1` but some models require different members")
    g.add_argument('--experiment',
                   nargs='*',
                   choices=['ssp1_2_6', 'ssp2_4_5', 'ssp3_7_0', 'ssp5_8_5'],
                   default=['ssp5_8_5'])
    # g.add_argument('--ensemble', action='store_true', help='If `--model` is not specified, default to all available models for the standard set of parameters. ')

    o = parser.parse_args()

    if not o.model:
        o.model = MODELS

    if not (o.location or (o.lon and o.lat)):
        parser.error(
            'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`'
        )

    elif o.location:
        loc = {loc['name']: loc for loc in locations}[o.location]
        o.lon, o.lat = loc['lon'], loc['lat']

    print('lon', o.lon)
    print('lat', o.lat)

    for experiment in o.experiment:
        variables = [
            ExtremeValueIndices(o.indicator,
                                model,
                                experiment,
                                historical=experiment != "historical",
                                ensemble=o.ensemble_member)
            for model in o.model
        ]

        # https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example
        downloaded_variables = []

        with concurrent.futures.ThreadPoolExecutor(
                max_workers=o.max_workers) as executor:
            # Start the load operations and mark each future with its URL
            future_to_url = {executor.submit(v.download): v for v in variables}
            for future in concurrent.futures.as_completed(future_to_url):
                v = future_to_url[future]
                try:
                    data = future.result()
                except Exception as exc:
                    print(f'failed to download {v} : {exc}')
                else:
                    downloaded_variables.append(v)

        loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E'
        folder = os.path.join(o.output, loc_folder, "extremes")
        os.makedirs(folder, exist_ok=True)

        dataset = {}

        # homogenize units
        dates = np.array([
            cftime.DatetimeGregorian(y, 12, 31) for y in range(1979, 2100 + 1)
        ])
        index = pd.Index(cftime.date2num(dates, time_units), name=time_units)

        for v in downloaded_variables:
            series = v.load_timeseries(lon=o.lon,
                                       lat=o.lat,
                                       overwrite=o.overwrite)
            series.index = index[:len(
                series)]  # otherwise we have things like 180, 182 etc

            dataset[v.model] = series

            csv_file = os.path.join(folder, f'{o.indicator}-{v.model}.csv')
            print("Save to file", csv_file)
            series.to_csv(csv_file)

        df = pd.DataFrame(dataset)

        csv_file = os.path.join(folder, f'{o.indicator}-all.csv')
        print("Save to file", csv_file)
        df.to_csv(csv_file)
Beispiel #12
0
            "2019-10-10 01:02:34",
            dt.datetime(2019, 10, 10, 1, 2, 34),
            id="str with space",
        ),
        pytest.param(
            "2019-10-10T01:02:34",
            dt.datetime(2019, 10, 10, 1, 2, 34),
            id="iso8601",
        ),
        pytest.param(
            np.datetime64("2019-10-10T11:22:33"),
            dt.datetime(2019, 10, 10, 11, 22, 33),
            id="datetime64",
        ),
        pytest.param(
            cftime.DatetimeGregorian(2019, 10, 10, 11, 22, 33),
            dt.datetime(2019, 10, 10, 11, 22, 33),
            id="cftime.DatetimeGregorian",
        ),
    ],
)
def test__to_datetime(given, expect):
    assert util.to_datetime(given) == expect


class Test_to_datetime(unittest.TestCase):
    def test_datetime(self):
        now = dt.datetime.now()
        result = util.to_datetime(now)
        self.assertEqual(result, now)
def test_ord_to_datetime():
    # Independence day
    date = cftime.DatetimeGregorian(1776, 7, 4, 12, 0, 0, 0)
    ord_time = date2num(date, TIMEUNITS)
    # Independence day (note that this fails if date has microseconds != 0)
    assert ord_to_datetime(ord_time, TIMEUNITS) == date
    # get data array for variable:
    _da = db_aggregated.filter(variable=var,
                               climatemodel=climatemodels_fl).timeseries(
                               ).transpose().unstack().to_xarray().squeeze()
    # convert to dataset:
    _ds = _da.to_dataset(name=var)
    # remove coordinate for variabel (contained in name):
    del _ds.coords[variable]
    # merge with existing dataset:
    ds = xr.merge([_ds, ds])
ds['year'] = xr.DataArray([t.year for t in ds['time'].values], dims='time')
ds['month'] = xr.DataArray([t.month for t in ds['time'].values], dims='time')
ds['day'] = xr.DataArray([t.day for t in ds['time'].values], dims='time')
# Convert to cftime
dates = [
    cftime.DatetimeGregorian(y, m, d)
    for y, m, d in zip(ds['year'], ds['month'], ds['day'])
]
ds['time'] = dates
ds = ds.sel(time=slice('1850', '2100'))
ds['time'] = pd.to_datetime([
    pd.datetime(y, m, d) for y, m, d in zip(ds['year'], ds['month'], ds['day'])
])
# Timestep for integral:
ds['delta_t'] = xr.DataArray(np.ones(len(ds['time'])),
                             dims='time',
                             coords={'time': ds['time']})
ds_save = ds.copy()

# %%
ds
Beispiel #15
0
    with forest.drivers.eida50.Database(path):
        pass
    with forest.drivers.eida50.Database(path):
        pass


@pytest.mark.parametrize(
    "value",
    [None, "2020-01-01 00:00:00",
     np.datetime64("2020-01-01 00:00:00", "s")],
)
def test_database_insert_times_invalid_types(value):
    """Anything that doesn't support object.strftime(fmt)"""
    database = forest.drivers.eida50.Database()
    with pytest.raises(Exception):
        database.insert_times([value], "file.nc")


@pytest.mark.parametrize(
    "value,expect",
    [
        pytest.param(dt.datetime(2020, 1, 1), dt.datetime(2020, 1, 1)),
        pytest.param(cftime.DatetimeGregorian(2020, 1, 1),
                     dt.datetime(2020, 1, 1)),
    ],
)
def test_database_insert_times_supported_types(value, expect):
    database = forest.drivers.eida50.Database()
    database.insert_times([value], "file.nc")
    assert database.fetch_times() == [expect]
    values = np.arange(3 * 3 * 3 * 3).reshape(3, 3, 3, 3)
    with netCDF4.Dataset(path, "w") as dataset:
        variable_4d(dataset, variable, times, pressures, longitudes, latitudes,
                    values)
    lon, lat = 0.1, 0.1
    loader = series.SeriesLoader([path])
    result = loader._load_netcdf4(path, variable, lon, lat, pressure=500)
    expect = {"x": times, "y": values[:, 1, 0, 0]}
    npt.assert_array_equal(expect["x"], result["x"])
    npt.assert_array_equal(expect["y"], result["y"])


@pytest.mark.parametrize(
    "value,expect",
    [(dt.datetime(2020, 1, 1), "2020-01-01 00:00:00"),
     (cftime.DatetimeGregorian(2020, 1, 1), "2020-01-01 00:00:00")])
def test_series_locator_key(value, expect):
    assert series.SeriesLocator.key(value) == expect


class TestSeries(unittest.TestCase):
    def setUp(self):
        self.path = "test-series.nc"

    def tearDown(self):
        if os.path.exists(self.path):
            os.remove(self.path)

    def test_series_given_missing_variable_returns_empty(self):
        pressure = 500
        lon = 1
    right = pd.date_range("2020-01-01", periods=3)
    assert time_array_equal(left, right) == False


def test_valueerror_lengths_must_match():
    a = ["2020-01-01T00:00:00Z"]
    b = [
        "2020-02-01T00:00:00Z", "2020-02-02T00:00:00Z", "2020-02-03T00:00:00Z"
    ]
    with pytest.raises(ValueError):
        pd.to_datetime(a) == pd.to_datetime(b)


@pytest.mark.parametrize("left,right,expect", [
    pytest.param([
        cftime.DatetimeGregorian(2020, 1, 1),
        cftime.DatetimeGregorian(2020, 1, 2),
        cftime.DatetimeGregorian(2020, 1, 3)
    ],
                 pd.date_range("2020-01-01", periods=3),
                 True,
                 id="gregorian/pandas same values"),
    pytest.param([
        cftime.DatetimeGregorian(2020, 2, 1),
        cftime.DatetimeGregorian(2020, 2, 2),
        cftime.DatetimeGregorian(2020, 2, 3)
    ],
                 pd.date_range("2020-01-01", periods=3),
                 False,
                 id="gregorian/pandas same length different values"),
])
Beispiel #18
0
def main(f, years):
    sDir = os.path.dirname(f)
    ncfile = xr.open_dataset(f, mask_and_scale=False)

    yrs = np.arange(years[0], years[1] + 1, 1)

    sf = pd.read_csv(os.path.join(sDir, 'summary_1970-2019.csv'))
    hindex = list(sf['findex'])

    # distance from land is < 60 nmile (111 km)
    storm_summary = dict(name=[],
                         year=[],
                         t0=[],
                         tf=[],
                         max_usa_sshs=[],
                         landfall_lat=[],
                         landfall_lon=[],
                         dist_from_shore_km=[],
                         landfall_cat=[],
                         landfall_wspd_kts=[],
                         landfall_pres=[],
                         findex=[])
    for i, hi in enumerate(hindex):
        ncf = ncfile.sel(storm=hi)
        t0 = min(t for t in ncf['time'].values
                 if t > cftime.DatetimeGregorian(1800, 1, 1, 0, 0, 0, 0))
        if t0.year in yrs:
            lf = ncf.landfall.values.astype('float')
            lf[lf == -9999] = np.nan  # convert fill values to nan

            # find all landfall indices
            lf_ind = np.where(lf < 111)[0]

            # find the storm category
            cats = return_clean_array(ncf, 'usa_sshs')
            max_cat = np.nanmax(cats)

            # if the storm makes landfall and is a TS or higher
            if np.logical_and(len(lf_ind) > 0, max_cat >= 0):

                # break up index into each consecutive section
                new_ind = []
                ni = []
                for tri, index in enumerate(lf_ind):
                    if 0 < tri < len(lf_ind) - 1:
                        if index - lf_ind[tri - 1] > 1:
                            new_ind.append(ni)
                            ni = []
                            ni.append(index)
                        else:
                            ni.append(index)
                    elif tri == len(lf_ind) - 1:
                        if index - lf_ind[tri - 1] > 1:
                            new_ind.append(ni)
                            new_ind.append([index])
                        else:
                            ni.append(index)
                            new_ind.append(ni)
                    else:
                        ni.append(index)

                # find the index of the beginning of each individual landfall (not just where landfall=0)
                landfall_idx = []
                for ii, jj in enumerate(new_ind):
                    landfall_idx.append(jj[0])

                lats = return_clean_array(ncf, 'lat')
                lons = return_clean_array(ncf, 'lon')
                wspd = return_clean_array(ncf, 'usa_wind')
                pres = return_clean_array(ncf, 'usa_pres')

                # find the storm category, max windspeed, and pressure at landfall
                for idx in landfall_idx:
                    lf_lon = lons[idx]
                    if lf_lon < -60:
                        storm_summary['name'].append(
                            ncf['name'].values.tostring().decode('utf-8'))
                        tf = max(ncf['time'].values)
                        storm_summary['t0'].append(t0)
                        storm_summary['tf'].append(tf)
                        storm_summary['year'].append(t0.year)
                        storm_summary['findex'].append(hi)
                        storm_summary['dist_from_shore_km'].append(lf[idx])
                        storm_summary['max_usa_sshs'].append(max_cat)
                        storm_summary['landfall_lat'].append(lats[idx])
                        storm_summary['landfall_lon'].append(lf_lon)
                        storm_summary['landfall_cat'].append(cats[idx])
                        storm_summary['landfall_wspd_kts'].append(wspd[idx])
                        storm_summary['landfall_pres'].append(pres[idx])

    df = pd.DataFrame(storm_summary)
    df.to_csv(os.path.join(sDir, 'NA_landfall_summary_1970-2019.csv'),
              index=False)