def run(forcing: pd.DataFrame, params: dict, elev: float, lat: float, disagg=True): """ Run all of the mtclim forcing generation Parameters ---------- forcing: The daily forcings given from input solar_geom: Solar geometry of the site Returns ------- forcing: Dataframe of daily or subdaily forcings """ # solar_geom returns a tuple due to restrictions of numba # for clarity we convert it to a dictionary here sg = solar_geom(elev, lat) sg = { 'tiny_rad_fract': sg[0], 'daylength': sg[1], 'potrad': sg[2], 'tt_max0': sg[3] } params['n_days'] = len(forcing) calc_t_air(forcing, elev, params) calc_prec(forcing, params) calc_snowpack(forcing, params) calc_srad_hum(forcing, sg, elev, params) if disagg: forcing = disaggregate(forcing, params, sg) else: # convert srad to daily average flux from daytime flux forcing['swrad'] *= forcing['dayl'] / cnst.SEC_PER_DAY return forcing
def wrap_run_cell(func: callable, params: dict, ds: xr.Dataset, state: xr.Dataset, disagg: bool, out_times: pd.DatetimeIndex): """ Iterate over a chunk of the domain. This is wrapped so we can return a tuple of locs and df. Parameters ---------- func: callable The function to call to do the work params: dict Parameters from a MetSim object ds: xr.Dataset Input forcings and domain state: xr.Dataset State variables at the point of interest disagg: bool Whether or not we should run a disagg routine out_times: pd.DatetimeIndex Times to return (should be trimmed 1 day at each end from the given index) Returns ------- df_complete: pd.DataFrame A dataframe with the disaggregated data in it df_base: pd.DataFrame A dataframe with the state data in it """ lat = ds['lat'].values.flatten()[0] lon = ds['lon'].values.flatten()[0] elev = ds['elev'].values.flatten()[0] params['elev'] = elev params['lat'] = lat params['lon'] = lon df = ds.to_dataframe() # solar_geom returns a tuple due to restrictions of numba # for clarity we convert it to a dictionary here sg = solar_geom(elev, lat, params['lapse_rate']) sg = { 'tiny_rad_fract': sg[0], 'daylength': sg[1], 'potrad': sg[2], 'tt_max0': sg[3] } yday = df.index.dayofyear - 1 df['daylength'] = sg['daylength'][yday] df['potrad'] = sg['potrad'][yday] df['tt_max'] = sg['tt_max0'][yday] # Generate the daily values - these are saved # so that we can use a subset of them to write # out the state file later df_base = func(df, params) if disagg: # Get some values for padding the time list, # so that when interpolating in the disaggregation # functions we can match endpoints with adjoining # chunks - if no data is available, just repeat some # default values (this case is used at the very # beginning and end of the record) t_begin = [state['t_min'].values[-1], state['t_max'].values[-1]] try: nextday = out_times[-1] + pd.Timedelta('1 days') t_end = [ ds['t_min'].sel(time=nextday), ds['t_max'].sel(time=nextday) ] except (KeyError, ValueError): # None so that we don't extend the record t_end = None # Disaggregate to subdaily values df_complete = disaggregate(df, params, sg, t_begin, t_end) # Calculate the times that we want to get out by chopping # off the endpoints that were added on previously start = out_times.values[0] stop = (out_times.values[-1] + pd.Timedelta('1 days') - pd.Timedelta("{} minutes".format(params['time_step']))) if params['period_ending']: start += pd.Timedelta('{} minutes'.format(params['time_step'])) stop += pd.Timedelta('{} minutes'.format(params['time_step'])) new_times = date_range(start, stop, freq='{}T'.format(params['time_step']), calendar=params['calendar']) else: # convert srad to daily average flux from daytime flux df_base['shortwave'] *= df_base['daylength'] / cnst.SEC_PER_DAY # If we're outputting daily values, we dont' need to # change the output dates - see inside of `if` condition # above for more explanation new_times = out_times df_complete = df_base # Cut the returned data down to the correct time index df_complete = df_complete.loc[new_times[0]:new_times[-1]] return df_complete, df_base
def wrap_run(func: callable, loc: dict, params: dict, ds: xr.Dataset, state: xr.Dataset, disagg: bool, out_times: pd.DatetimeIndex, year: str): """ Iterate over a chunk of the domain. This is wrapped so we can return a tuple of locs and df. Parameters ---------- func: callable The function to call to do the work loc: dict Some subset of the domain to do work on params: dict Parameters from a MetSim object ds: xr.Dataset Input forcings and domain state: xr.Dataset State variables at the point of interest disagg: bool Whether or not we should run a disagg routine out_times: pd.DatetimeIndex Times to return (should be trimmed 1 day at each end from the given index) year: str The year being run. This is used to add on extra times to make output smooth at endpoints if the run is chunked in time. Returns ------- results A list of tuples arranged as (location, hourly_output, daily_output) """ logger.info("Processing {}".format(loc)) lat = ds['lat'].values elev = ds['elev'].values swe = ds['swe'].values df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe() # solar_geom returns a tuple due to restrictions of numba # for clarity we convert it to a dictionary here sg = solar_geom(elev, lat) sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1], 'potrad': sg[2], 'tt_max0': sg[3]} # Generate the daily values - these are saved # so that we can use a subset of them to write # out the state file later df_base = func(df, params, sg, elev=elev, swe=swe) if disagg: # Get some values for padding the time list, # so that when interpolating in the disaggregation # functions we can match endpoints with adjoining # chunks - if no data is available, just repeat some # default values (this case is used at the very # beginning and end of the record) try: prevday = out_times[0] - pd.Timedelta('1 days') t_begin = [ds['t_min'].sel(time=prevday), ds['t_max'].sel(time=prevday)] except (KeyError, ValueError): t_begin = [state['t_min'].values[-1], state['t_max'].values[-1]] try: nextday = pd.datetime(int(year)+1, 1, 1) t_end = [ds['t_min'].sel(time=nextday), ds['t_max'].sel(time=nextday)] except (KeyError, ValueError): # None so that we don't extend the record t_end = None # Disaggregate to subdaily values df_complete = disaggregate(df, params, sg, t_begin, t_end) # Calculate the times that we want to get out by chopping # off the endpoints that were added on previously start = out_times[0] stop = out_times[-1] + pd.Timedelta('23 hours') new_times = date_range( start, stop, freq='{}T'.format(params['time_step']), calendar=params['calendar']) else: # convert srad to daily average flux from daytime flux df_base['swrad'] *= df_base['dayl'] / cnst.SEC_PER_DAY # If we're outputting daily values, we dont' need to # change the output dates - see inside of `if` condition # above for more explanation new_times = out_times # Cut the returned data down to the correct time index df_complete = df_complete.loc[new_times[0]:new_times[-1]] df_base = df_base.loc[new_times[0]:new_times[-1]] return (loc, df_complete, df_base)
def run(self): """ Kicks off the disaggregation and queues up data for IO """ time_dim = pd.to_datetime(self.met_data.time.values) if self.params['annual']: groups = time_dim.groupby(time_dim.year) else: groups = {'total': time_dim} for label, times in groups.items(): logger.info("Beginning {}".format(label)) # Add in some end point data for continuity times_ext = times.union([times[0] - pd.Timedelta("1 days"), times[-1] + pd.Timedelta("1 days")] ).intersection(time_dim) data = self.met_data.sel(time=times_ext) self.setup_output(self.met_data.sel(time=times)) for i, j in self.locations: locs = dict(lat=i, lon=j) logger.info("Processing {}".format(locs)) ds = data.isel(**locs) lat = ds['lat'].values elev = ds['elev'].values swe = ds['swe'].values df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe() # solar_geom returns a tuple due to restrictions of numba # for clarity we convert it to a dictionary here sg = solar_geom(elev, lat) sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1], 'potrad': sg[2], 'tt_max0': sg[3]} # Generate the daily values - these are saved # so that we can use a subset of them to write # out the state file later df = self.method.run(df, self.params, sg, elev=elev, swe=swe) # Get some values for padding the time list, # so that when interpolating in the disaggregation # functions we can match endpoints with adjoining # chunks - if no data is available, just repeat some # default values (this case is used at the very # beginning and end of the record) if self.disagg: try: prevday = data.time[0] - pd.Timedelta('1 days') t_begin = [self.met_data['t_min'].sel( time=prevday).isel(lat=i, lon=j), self.met_data['t_max'].sel( time=prevday).isel(lat=i, lon=j)] except (KeyError, ValueError): t_begin = [self.state['t_min'].values[-1, i, j], self.state['t_max'].values[-1, i, j]] try: nextday = pd.datetime(int(label)+1, 1, 1) t_end = [self.met_data['t_min'].sel( time=nextday).isel(lat=i, lon=j), self.met_data['t_max'].sel( time=nextday).isel(lat=i, lon=j)] except (KeyError, ValueError): # None so that we don't extend the record t_end = None self._unpack_state(df, locs) df = disaggregate(df, self.params, sg, t_begin, t_end) start = times[0] stop = (times[-1] + pd.Timedelta('1 days') - pd.Timedelta(self.params['time_step'])) new_times = date_range( start, stop, freq='{}T'.format(self.params['time_step']), calendar=self.params['calendar']) else: # convert srad to daily average flux from daytime flux self._unpack_state(df, locs) df['swrad'] *= df['dayl'] / cnst.SEC_PER_DAY # If we're outputting daily values, we dont' need to # change the output dates - see inside of `if` condition # above for more explanation new_times = times # Cut the returned data down to the correct time index self._unpack_results((locs, df.loc[new_times[0]:new_times[-1]])) self.write(label)