Esempio n. 1
0
def run(forcing: pd.DataFrame,
        params: dict,
        elev: float,
        lat: float,
        disagg=True):
    """
    Run all of the mtclim forcing generation

    Parameters
    ----------
    forcing:
        The daily forcings given from input
    solar_geom:
        Solar geometry of the site

    Returns
    -------
    forcing:
        Dataframe of daily or subdaily forcings
    """

    # solar_geom returns a tuple due to restrictions of numba
    # for clarity we convert it to a dictionary here
    sg = solar_geom(elev, lat)
    sg = {
        'tiny_rad_fract': sg[0],
        'daylength': sg[1],
        'potrad': sg[2],
        'tt_max0': sg[3]
    }
    params['n_days'] = len(forcing)
    calc_t_air(forcing, elev, params)
    calc_prec(forcing, params)
    calc_snowpack(forcing, params)
    calc_srad_hum(forcing, sg, elev, params)

    if disagg:
        forcing = disaggregate(forcing, params, sg)
    else:
        # convert srad to daily average flux from daytime flux
        forcing['swrad'] *= forcing['dayl'] / cnst.SEC_PER_DAY

    return forcing
Esempio n. 2
0
def wrap_run_cell(func: callable, params: dict, ds: xr.Dataset,
                  state: xr.Dataset, disagg: bool,
                  out_times: pd.DatetimeIndex):
    """
    Iterate over a chunk of the domain. This is wrapped
    so we can return a tuple of locs and df.

    Parameters
    ----------
    func: callable
        The function to call to do the work
    params: dict
        Parameters from a MetSim object
    ds: xr.Dataset
        Input forcings and domain
    state: xr.Dataset
        State variables at the point of interest
    disagg: bool
        Whether or not we should run a disagg routine
    out_times: pd.DatetimeIndex
        Times to return (should be trimmed 1 day at
        each end from the given index)

    Returns
    -------
    df_complete: pd.DataFrame
        A dataframe with the disaggregated data in it
    df_base: pd.DataFrame
        A dataframe with the state data in it
    """
    lat = ds['lat'].values.flatten()[0]
    lon = ds['lon'].values.flatten()[0]
    elev = ds['elev'].values.flatten()[0]
    params['elev'] = elev
    params['lat'] = lat
    params['lon'] = lon
    df = ds.to_dataframe()

    # solar_geom returns a tuple due to restrictions of numba
    # for clarity we convert it to a dictionary here
    sg = solar_geom(elev, lat, params['lapse_rate'])
    sg = {
        'tiny_rad_fract': sg[0],
        'daylength': sg[1],
        'potrad': sg[2],
        'tt_max0': sg[3]
    }
    yday = df.index.dayofyear - 1
    df['daylength'] = sg['daylength'][yday]
    df['potrad'] = sg['potrad'][yday]
    df['tt_max'] = sg['tt_max0'][yday]

    # Generate the daily values - these are saved
    # so that we can use a subset of them to write
    # out the state file later
    df_base = func(df, params)

    if disagg:
        # Get some values for padding the time list,
        # so that when interpolating in the disaggregation
        # functions we can match endpoints with adjoining
        # chunks - if no data is available, just repeat some
        # default values (this case is used at the very
        # beginning and end of the record)
        t_begin = [state['t_min'].values[-1], state['t_max'].values[-1]]
        try:
            nextday = out_times[-1] + pd.Timedelta('1 days')
            t_end = [
                ds['t_min'].sel(time=nextday), ds['t_max'].sel(time=nextday)
            ]
        except (KeyError, ValueError):
            # None so that we don't extend the record
            t_end = None

        # Disaggregate to subdaily values
        df_complete = disaggregate(df, params, sg, t_begin, t_end)

        # Calculate the times that we want to get out by chopping
        # off the endpoints that were added on previously
        start = out_times.values[0]
        stop = (out_times.values[-1] + pd.Timedelta('1 days') -
                pd.Timedelta("{} minutes".format(params['time_step'])))

        if params['period_ending']:
            start += pd.Timedelta('{} minutes'.format(params['time_step']))
            stop += pd.Timedelta('{} minutes'.format(params['time_step']))

        new_times = date_range(start,
                               stop,
                               freq='{}T'.format(params['time_step']),
                               calendar=params['calendar'])
    else:
        # convert srad to daily average flux from daytime flux
        df_base['shortwave'] *= df_base['daylength'] / cnst.SEC_PER_DAY
        # If we're outputting daily values, we dont' need to
        # change the output dates - see inside of `if` condition
        # above for more explanation
        new_times = out_times
        df_complete = df_base

    # Cut the returned data down to the correct time index
    df_complete = df_complete.loc[new_times[0]:new_times[-1]]
    return df_complete, df_base
Esempio n. 3
0
def wrap_run(func: callable, loc: dict, params: dict,
             ds: xr.Dataset, state: xr.Dataset, disagg: bool,
             out_times: pd.DatetimeIndex, year: str):
    """
    Iterate over a chunk of the domain. This is wrapped
    so we can return a tuple of locs and df.

    Parameters
    ----------
    func: callable
        The function to call to do the work
    loc: dict
        Some subset of the domain to do work on
    params: dict
        Parameters from a MetSim object
    ds: xr.Dataset
        Input forcings and domain
    state: xr.Dataset
        State variables at the point of interest
    disagg: bool
        Whether or not we should run a disagg routine
    out_times: pd.DatetimeIndex
        Times to return (should be trimmed 1 day at
        each end from the given index)
    year: str
        The year being run. This is used to add on
        extra times to make output smooth at endpoints
        if the run is chunked in time.

    Returns
    -------
    results
        A list of tuples arranged as
        (location, hourly_output, daily_output)
    """
    logger.info("Processing {}".format(loc))
    lat = ds['lat'].values
    elev = ds['elev'].values
    swe = ds['swe'].values
    df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe()
    # solar_geom returns a tuple due to restrictions of numba
    # for clarity we convert it to a dictionary here
    sg = solar_geom(elev, lat)
    sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1],
          'potrad': sg[2], 'tt_max0': sg[3]}

    # Generate the daily values - these are saved
    # so that we can use a subset of them to write
    # out the state file later
    df_base = func(df, params, sg, elev=elev, swe=swe)

    if disagg:
        # Get some values for padding the time list,
        # so that when interpolating in the disaggregation
        # functions we can match endpoints with adjoining
        # chunks - if no data is available, just repeat some
        # default values (this case is used at the very
        # beginning and end of the record)
        try:
            prevday = out_times[0] - pd.Timedelta('1 days')
            t_begin = [ds['t_min'].sel(time=prevday),
                       ds['t_max'].sel(time=prevday)]
        except (KeyError, ValueError):
            t_begin = [state['t_min'].values[-1],
                       state['t_max'].values[-1]]
        try:
            nextday = pd.datetime(int(year)+1, 1, 1)
            t_end = [ds['t_min'].sel(time=nextday),
                     ds['t_max'].sel(time=nextday)]
        except (KeyError, ValueError):
            # None so that we don't extend the record
            t_end = None

        # Disaggregate to subdaily values
        df_complete = disaggregate(df, params, sg, t_begin, t_end)
        # Calculate the times that we want to get out by chopping
        # off the endpoints that were added on previously
        start = out_times[0]
        stop = out_times[-1] + pd.Timedelta('23 hours')
        new_times = date_range(
            start, stop, freq='{}T'.format(params['time_step']),
            calendar=params['calendar'])
    else:
        # convert srad to daily average flux from daytime flux
        df_base['swrad'] *= df_base['dayl'] / cnst.SEC_PER_DAY
        # If we're outputting daily values, we dont' need to
        # change the output dates - see inside of `if` condition
        # above for more explanation
        new_times = out_times

    # Cut the returned data down to the correct time index
    df_complete = df_complete.loc[new_times[0]:new_times[-1]]
    df_base = df_base.loc[new_times[0]:new_times[-1]]
    return (loc, df_complete, df_base)
Esempio n. 4
0
    def run(self):
        """
        Kicks off the disaggregation and queues up data for IO
        """
        time_dim = pd.to_datetime(self.met_data.time.values)
        if self.params['annual']:
            groups = time_dim.groupby(time_dim.year)
        else:
            groups = {'total': time_dim}
        for label, times in groups.items():
            logger.info("Beginning {}".format(label))
            # Add in some end point data for continuity
            times_ext = times.union([times[0] - pd.Timedelta("1 days"),
                                     times[-1] + pd.Timedelta("1 days")]
                                    ).intersection(time_dim)
            data = self.met_data.sel(time=times_ext)
            self.setup_output(self.met_data.sel(time=times))
            for i, j in self.locations:
                locs = dict(lat=i, lon=j)
                logger.info("Processing {}".format(locs))
                ds = data.isel(**locs)
                lat = ds['lat'].values
                elev = ds['elev'].values
                swe = ds['swe'].values
                df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe()
                # solar_geom returns a tuple due to restrictions of numba
                # for clarity we convert it to a dictionary here
                sg = solar_geom(elev, lat)
                sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1],
                      'potrad': sg[2], 'tt_max0': sg[3]}

                # Generate the daily values - these are saved
                # so that we can use a subset of them to write
                # out the state file later
                df = self.method.run(df, self.params, sg,
                                     elev=elev, swe=swe)

                # Get some values for padding the time list,
                # so that when interpolating in the disaggregation
                # functions we can match endpoints with adjoining
                # chunks - if no data is available, just repeat some
                # default values (this case is used at the very
                # beginning and end of the record)
                if self.disagg:
                    try:
                        prevday = data.time[0] - pd.Timedelta('1 days')
                        t_begin = [self.met_data['t_min'].sel(
                                       time=prevday).isel(lat=i, lon=j),
                                   self.met_data['t_max'].sel(
                                       time=prevday).isel(lat=i, lon=j)]
                    except (KeyError, ValueError):
                        t_begin = [self.state['t_min'].values[-1, i, j],
                                   self.state['t_max'].values[-1, i, j]]
                    try:
                        nextday = pd.datetime(int(label)+1, 1, 1)
                        t_end = [self.met_data['t_min'].sel(
                                     time=nextday).isel(lat=i, lon=j),
                                 self.met_data['t_max'].sel(
                                     time=nextday).isel(lat=i, lon=j)]
                    except (KeyError, ValueError):
                        # None so that we don't extend the record
                        t_end = None

                    self._unpack_state(df, locs)
                    df = disaggregate(df, self.params, sg, t_begin, t_end)
                    start = times[0]
                    stop = (times[-1] + pd.Timedelta('1 days')
                            - pd.Timedelta(self.params['time_step']))
                    new_times = date_range(
                        start, stop,
                        freq='{}T'.format(self.params['time_step']),
                        calendar=self.params['calendar'])
                else:
                    # convert srad to daily average flux from daytime flux
                    self._unpack_state(df, locs)
                    df['swrad'] *= df['dayl'] / cnst.SEC_PER_DAY
                    # If we're outputting daily values, we dont' need to
                    # change the output dates - see inside of `if` condition
                    # above for more explanation
                    new_times = times

                # Cut the returned data down to the correct time index
                self._unpack_results((locs, df.loc[new_times[0]:new_times[-1]]))

            self.write(label)