def _check_temperature_cols( temp_min_col=None, temp_max_col=None, temp_mean_col=None, temp_min_required=False, temp_max_required=False, ): """Check temperature columns to make sure necessary ones are filled in.""" if temp_min_col is None and temp_min_required is True: raise ValueError( tsutils.error_wrapper(""" This evaporation method requires the minimum daily temperature column to be specified with "temp_min_col".""" .format(**locals))) if temp_max_col is None and temp_max_required is True: raise ValueError( tsutils.error_wrapper(""" This evaporation method requires the maximum daily temperature column to be specified with "temp_max_col".""" .format(**locals))) if temp_min_col is None or temp_max_col is None: raise ValueError( tsutils.error_wrapper(""" If you do not pass a mean daily temperature column in "temp_mean_col" you must give both minimum and maximum daily temperatures using "temp_min_col" and "temp_max_col". You gave {temp_min_col} for "temp_min_col" and {temp_max_col} for "temp_max_col". """.format(**locals))) return _check_cols((temp_min_col, "tmin"), (temp_max_col, "tmax"), (temp_mean_col, "tmean"))
def _validate_temperatures(tsd, temp_min_col, temp_max_col): if "tmean" not in tsd.columns: if (tsd.tmax < tsd.tmin).any(): raise ValueError( tsutils.error_wrapper(""" On the following dates: {0}, minimum temperature values in column "{1}" are greater than or equal to the maximum temperature values in column "{2}".""".format( tsd[tsd.tmax < tsd.tmin].index, temp_min_col, temp_max_col))) warnings.warn( tsutils.error_wrapper( """ Since `temp_mean_col` is None, the average daily temperature will be estimated by the average of `temp_min_col` and `temp_max_col`""".format( **locals()))) tsd["tmean"] = (tsd.tmin + tsd.tmax) / 2.0 else: if (tsd.tmin >= tsd.tmean).any() or (tsd.tmax <= tsd.tmean).any(): raise ValueError( tsutils.error_wrapper(""" On the following dates: {0}, the daily average is either below or equal to the minimum temperature in column {1} or higher or equal to the maximum temperature in column {2}.""".format( tsd[tsd.tmin >= tsd.tmean | tsd.tmax <= tsd.tmean], temp_min_col, temp_max_col, ))) return tsd
def write_dsn(self, wdmpath, dsn, data): """Write to self.wdmfp/dsn the time-series data.""" dsn_desc = self.describe_dsn(wdmpath, dsn) tcode = dsn_desc["tcode"] tstep = dsn_desc["tstep"] tsfill = dsn_desc["tsfill"] data.fillna(tsfill, inplace=True) start_date = data.index[0] dstart_date = start_date.timetuple()[:6] llsdat = self._tcode_date(tcode, dstart_date) if dsn_desc["base_year"] > llsdat[0]: raise ValueError( tsutils.error_wrapper(""" The base year for this DSN is {0}. All data to insert must be after the base year. Instead the first year of the series is {1}. """.format(dsn_desc["base_year"], llsdat[0]))) nval = len(data) lock = SoftFileLock(wdmpath + ".lock", timeout=30) with lock: wdmfp = self._open(wdmpath, 58) retcode = self.wdtput(wdmfp, dsn, tstep, llsdat, nval, 1, 0, tcode, data) self._close(wdmpath) self._retcode_check(retcode, additional_info="wdtput file={0} DSN={1}".format( wdmpath, dsn))
def cleancopywdm(inwdmpath, outwdmpath, overwrite=False): """Make a clean copy of a WDM file. Parameters ---------- inwdmpath Path to input WDM file. outwdmpath Path to clean copy WDM file. overwrite Whether to overwrite an existing outwdmpath. """ if inwdmpath == outwdmpath: raise ValueError( tsutils.error_wrapper(""" The "inwdmpath" cannot be the same as "outwdmpath". """)) createnewwdm(outwdmpath, overwrite=overwrite) activedsn = [] for i in range(1, 32000): try: activedsn.append(_describedsn(inwdmpath, i)["dsn"]) except wdmutil.WDMError: continue # Copy labels (which copies DSN metadata and data) for i in activedsn: try: _copy_dsn(inwdmpath, i, outwdmpath, i) except wdmutil.WDMError: pass
def dump(hbnfilename, time_stamp="begin"): """ Prints out ALL data from a HSPF binary output file. """ if time_stamp not in ["begin", "end"]: raise ValueError( tsutils.error_wrapper(""" The "time_stamp" optional keyword must be either "begin" or "end". You gave {0}. """.format(time_stamp))) index, data = _get_data(hbnfilename, None, [",,,"], catalog_only=False) skeys = sorted(data.keys()) result = pd.DataFrame( pd.concat([pd.Series(data[i], index=index) for i in skeys], sort=False, axis=1).reindex(pd.Index(index))) columns = ["{0}_{1}_{2}_{3}".format(i[1], i[2], i[4], i[5]) for i in skeys] result.columns = columns if time_stamp == "begin": result = tsutils.asbestfreq(result) result = result.tshift(-1) return result
def _columns(tsd, req_column_list=[], optional_column_list=[]): if None in req_column_list: raise ValueError( tsutils.error_wrapper( """ You need to supply the column (name or number, data column numbering starts at 1) for {0} time-series. Instead you gave {1}""".format( len(req_column_list), req_column_list ) ) ) collect = [] for loopvar in req_column_list + optional_column_list: try: nloopvar = int(loopvar) - 1 except TypeError: nloopvar = loopvar if nloopvar is None: collect.append(None) else: collect.append(tsd.ix[:, nloopvar]) return collect
def _open(self, wdname, wdmsfl, ronwfg=0): """Private method to open WDM file.""" wdname = wdname.strip() if wdname not in self.openfiles: if ronwfg in [0, 1]: if not os.path.exists(wdname): raise ValueError( tsutils.error_wrapper(""" Trying to open file "{0}" and it cannot be found. """.format(wdname))) retcode = self.wdbopn(wdmsfl, wdname, ronwfg) self._retcode_check( retcode, additional_info="wdbopn file={0} DSN={1}".format(wdname, "NA")) self.openfiles[wdname] = wdmsfl return wdmsfl
def extract(*wdmpath, **kwds): """Print out DSN data to the screen with ISO-8601 dates. This is the API version also used by 'extract_cli' """ # Adapt to both forms of presenting wdm files and DSNs # Old form '... file.wdm 101 102 103 ...' # New form '... file.wdm,101 adifferentfile.wdm,101 ... try: start_date = kwds.pop("start_date") except KeyError: start_date = None try: end_date = kwds.pop("end_date") except KeyError: end_date = None if len(kwds) > 0: raise ValueError( tsutils.error_wrapper(""" The only allowed keywords are start_date and end_date. You have given {0}. """.format(kwds))) labels = [] for lab in wdmpath: if "," in str(lab): labels.append(lab.split(",")) else: if lab == wdmpath[0]: continue labels.append([wdmpath[0], lab]) result = pd.DataFrame() cnt = 0 for ind, lab in enumerate(labels): wdmpath = lab[0] dsn = lab[1] nts = WDM.read_dsn(wdmpath, int(dsn), start_date=start_date, end_date=end_date) if nts.columns[0] in result.columns: cnt = cnt + 1 nts.columns = ["{0}_{1}".format(nts.columns[0], cnt)] result = result.join(nts, how="outer") return result
def listdsns(wdmpath): """Print out a table describing all DSNs in the WDM. """ if not os.path.exists(wdmpath): raise ValueError( tsutils.error_wrapper(""" File {0} does not exist. """.format(wdmpath))) collect = OrderedDict() for i in range(1, 32001): try: testv = _describedsn(wdmpath, i) except wdmutil.WDMError: continue collect[i] = testv return collect
def single_target_units(source_units, target_units, default=None, cnt=1): if default is None: return source_units if target_units is None: return [default] * len(source_units) tunits = set(target_units) if len(tunits) != cnt: raise ValueError( tsutils.error_wrapper(""" Since creating a single disaggregated time-series there can only be a single "target_units". You gave "{target_units}". """.format(**locals()))) if len(source_units) == len(target_units): return target_units return [target_units[0]] * len(source_units)
def extract_arr(filename, *labels): """DEPRECATED: Extract and return the raw numpy array. DEPRECATED: Will be removed in future version. Instead use the following. >>> from swmmtoolbox import swmmtoolbox >>> na = swmmtoolbox.extract("filename.out", "link,41a,Flow_rate")[0].to_array() The `extract_arr` function will return the numpy array for the last entry in "*labels". Parameters ---------- {filename} {labels} """ warnings.warn( tsutils.error_wrapper( """ DEPRECATED: Will be removed in future version. Instead use the following. >>> from swmmtoolbox import swmmtoolbox >>> na = swmmtoolbox.extract("filename.out", "link,41a,Flow_rate")[0].to_array() """ ) ) obj = SwmmExtract(filename) for label in labels: itemtype, name, variableindex = tsutils.make_list(label, n=3) typenumber = obj.type_check(itemtype) if itemtype != "system": name = obj.name_check(itemtype, name)[0] data = np.zeros(len(list(range(obj.swmm_nperiods)))) for time in range(obj.swmm_nperiods): _, value = obj.get_swmm_results(typenumber, name, int(variableindex), time) data[time] = value return data
def radiation( method: Literal["pot_rad", "pot_rad_via_ssd", "pot_rad_via_bc", "mean_course"], source_units, input_ts="-", columns=None, start_date=None, end_date=None, dropna="no", clean=False, round_index=None, skiprows=None, index_type="datetime", names=None, target_units=None, print_input=False, pot_rad=None, angstr_a=0.25, angstr_b=0.5, bristcamp_a=0.75, bristcamp_c=2.4, hourly_rad=None, lat=None, lon=None, glob_swr_col=None, ): """Disaggregate daily to hourly data. disaggregate_radiation(data_daily, sun_times=None, pot_rad=None, method='pot_rad', angstr_a=0.25, angstr_b=0.5, bristcamp_a=0.75, bristcamp_c=2.4, mean_course=None): Args: daily_data: daily values sun_times: daily dataframe including results of the util.sun_times function pot_rad: hourly dataframe including potential radiation method: keyword specifying the disaggregation method to be used angstr_a: parameter a of the Angstrom model (intercept) angstr_b: parameter b of the Angstrom model (slope) bristcamp_a: ? bristcamp_c: ? mean_course: monthly values of the mean hourly radiation course if method == 'mean_course': HOURLY radiation in "mean_course" if method in ('pot_rad', 'mean_course') .glob from glob_swr_col if method == 'pot_rad_via_ssd' daily sun_times if method == 'pot_rad_via_bc' bristcamp_a bristcamp_c """ target_units = single_target_units(source_units, target_units, "W/m2") target_units = target_units[0] * len(source_units) pd.options.display.width = 60 if method == "mean_course" and hourly_rad is None: raise ValueError( tsutils.error_wrapper(""" If method is "mean_course" need to supply CSV filename of hourly radiation by the `hourly_rad` keyword.""")) if method in ["pot_rad", "mean_course"] and glob_swr_col is None: raise ValueError( tsutils.error_wrapper(""" If method is "pot_rad" or "mean_course" need to supply the daily global short wave radiation as column name or index with keyword `glob_swr_col`""")) if method == "pot_rad_via_bc" and (bristcamp_a is None or bristcamp_c is None): raise ValueError( tsutils.error_wrapper(""" If method is "pot_rad_via_bc" need to supply the keywords `bristcamp_a` and `bristcamp_c`.""")) tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) if method in ["pot_rad", "mean_course"]: try: glob_swr_col = glob_swr_col - 1 except ValueError: pass tsd["glob"] = tsd[glob_swr_col] sun_times = None if method == "pot_rad_via_ssd": sun_times = get_sun_times(tsd.index, float(lon), float(lat), round(lon / 15.0)) return tsutils.return_input( print_input, tsd, pd.DataFrame( disaggregate_radiation( tsd, sun_times=sun_times, pot_rad=pot_rad, method=method, angstr_a=angstr_a, angstr_b=angstr_b, bristcamp_a=bristcamp_a, bristcamp_c=bristcamp_c, mean_course=hourly_rad, )), )
def create_new_dsn( self, wdmpath, dsn, tstype="", base_year=1900, tcode=4, tsstep=1, statid=" ", scenario="", location="", description="", constituent="", tsfill=-999.0, ): """Create self.wdmfp/dsn.""" lock = SoftFileLock(wdmpath + ".lock", timeout=30) with lock: wdmfp = self._open(wdmpath, 57) messfp = self.wmsgop() if self.wdckdt(wdmfp, dsn) == 1: self._close(wdmpath) raise DSNExistsError(dsn) # Parameters for wdlbax taken from ATCTSfile/clsTSerWDM.cls self.wdlbax( wdmfp, dsn, 1, # DSTYPE - always 1 for time series 10, # NDN - number of down pointers 10, # NUP - number of up pointers 30, # NSA - number of search attributes 100, # NSASP - amount of search attribute space 300, # NDP - number of data pointers ) # PSA - pointer to search attribute space for saind, salen, saval in [ (34, 1, 6), # tgroup (83, 1, 1), # compfg (84, 1, 1), # tsform (85, 1, 1), # vbtime (17, 1, int(tcode)), # tcode (33, 1, int(tsstep)), # tsstep (27, 1, int(base_year)), # tsbyr ]: retcode = self.wdbsai(wdmfp, dsn, messfp, saind, salen, saval) self._retcode_check( retcode, additional_info="wdbsai file={0} DSN={1}".format( wdmpath, dsn), ) for saind, salen, saval in [(32, 1, tsfill)]: # tsfill retcode = self.wdbsar(wdmfp, dsn, messfp, saind, salen, saval) self._retcode_check( retcode, additional_info="wdbsar file={0} DSN={1}".format( wdmpath, dsn), ) for saind, salen, saval, error_name in [ (2, 16, statid, "Station ID"), (1, 4, tstype.upper(), "Time series type - tstype"), (45, 48, description.upper(), "Description"), (288, 8, scenario.upper(), "Scenario"), (289, 8, constituent.upper(), "Constituent"), (290, 8, location.upper(), "Location"), ]: saval = saval.strip() if len(saval) > salen: raise ValueError( tsutils.error_wrapper(""" String "{0}" is too long for {1}. Must have a length equal or less than {2}. """.format(saval, error_name, salen))) saval = "{0: <{1}}".format(saval, salen) retcode = self.wdbsac(wdmfp, dsn, messfp, saind, salen, saval) self._retcode_check( retcode, additional_info="wdbsac file={0} DSN={1}".format( wdmpath, dsn), ) self._close(wdmpath)
def _get_data(binfilename, interval="daily", labels=[",,,"], catalog_only=True): """Underlying function to read from the binary file. Used by 'extract', 'catalog', and 'dump'. """ testem = { "PERLND": [ "ATEMP", "SNOW", "PWATER", "SEDMNT", "PSTEMP", "PWTGAS", "PQUAL", "MSTLAY", "PEST", "NITR", "PHOS", "TRACER", "", ], "IMPLND": ["ATEMP", "SNOW", "IWATER", "SOLIDS", "IWTGAS", "IQUAL", ""], "RCHRES": [ "HYDR", "CONS", "HTRCH", "SEDTRN", "GQUAL", "OXRX", "NUTRX", "PLANK", "PHCARB", "INFLOW", "OFLOW", "ROFLOW", "", ], "BMPRAC": [""], "": [""], } collect_dict = {} lablist = [] # Normalize interval code try: intervalcode = interval2codemap[interval.lower()] except AttributeError: intervalcode = None # Fixup and test the labels - could be in it's own function for lindex, label in enumerate(labels): words = [lindex] + label.split(",") if len(words) != 5: raise ValueError( tsutils.error_wrapper(""" The label '{0}' has the wrong number of entries. """.format(label))) words = [None if i == "" else i for i in words] if words[1] is not None: words[1] = words[1].upper() if words[1] not in testem.keys(): raise ValueError( tsutils.error_wrapper(""" Operation type must be one of 'PERLND', 'IMPLND', 'RCHRES', or 'BMPRAC', or missing (to get all) instead of {0}. """.format(words[1]))) if words[2] is not None: try: words[2] = int(words[2]) if words[2] < 1 or words[2] > 999: raise ValueError() except (ValueError, TypeError): raise ValueError( tsutils.error_wrapper(""" The land use element must be an integer from 1 to 999 inclusive, instead of {0}. """.format(words[2]))) if words[3] is not None: words[3] = words[3].upper() if words[3] not in testem[words[1]]: raise ValueError( tsutils.error_wrapper(""" The {0} operation type only allows the variable groups: {1}, instead you gave {2}. """.format(words[1], testem[words[1]][:-1], words[3]))) words.append(intervalcode) lablist.append(words) with open(binfilename, "rb") as fl: mindate = datetime.datetime.max maxdate = datetime.datetime.min labeltest = {} vnames = {} ndates = {} rectype = 0 fl.read(1) while True: try: reclen1, reclen2, reclen3, reclen = struct.unpack( "4B", fl.read(4)) except struct.error: # End of file. break rectype, optype, lue, section = struct.unpack( "I8sI8s", fl.read(24)) rectype = int(rectype) lue = int(lue) optype = optype.strip() section = section.strip() slen = 0 if rectype == 0: reclen1 = int(reclen1 / 4) reclen2 = reclen2 * 64 + reclen1 reclen3 = reclen3 * 16384 + reclen2 reclen = reclen * 4194304 + reclen3 - 24 while slen < reclen: length = struct.unpack("I", fl.read(4))[0] slen = slen + length + 4 variable_name = struct.unpack("{0}s".format(length), fl.read(length))[0] vnames.setdefault((lue, section), []).append(variable_name) elif rectype == 1: # Data record numvals = len(vnames[(lue, section)]) (_, level, year, month, day, hour, minute) = struct.unpack("7I", fl.read(28)) vals = struct.unpack("{0}f".format(numvals), fl.read(4 * numvals)) if hour == 24: ndate = (datetime.datetime(year, month, day) + datetime.timedelta(hours=24) + datetime.timedelta(minutes=minute)) else: ndate = datetime.datetime(year, month, day, hour, minute) for i, vname in enumerate(vnames[(lue, section)]): tmpkey = ( None, optype.decode("ascii"), int(lue), section.decode("ascii"), vname.decode("ascii"), level, ) if catalog_only is False: res = tupleSearch(tmpkey, lablist) if res: nres = (res[0][0], ) + res[0][1][1:] labeltest[nres[0]] = 1 collect_dict.setdefault(nres, []).append(vals[i]) ndates.setdefault(level, {})[ndate] = 1 else: mindate = min(mindate, ndate) maxdate = max(maxdate, ndate) pdoffset = code2freqmap[level] collect_dict[tmpkey[1:]] = ( pd.Period(mindate, freq=pdoffset), pd.Period(maxdate, freq=pdoffset), ) else: fl.seek(-31, 1) # The following should be 1 or 2, but I don't know how to calculate # it, so I just use that the 'rectype' must be 0 or 1, and if not # rewind the correct amount. fl.read(2) if not collect_dict: raise ValueError( tsutils.error_wrapper(""" The label specifications below matched no records in the binary file. {lablist} """.format(**locals()))) if catalog_only is False: not_in_file = [] for loopcnt in list(range(len(lablist))): if loopcnt not in labeltest.keys(): not_in_file.append(labels[loopcnt]) if not_in_file: warnings.warn( tsutils.error_wrapper(""" The specification{0} {1} matched no records in the binary file. """.format("s"[len(not_in_file) == 1:], not_in_file))) return ndates, collect_dict
def extract(hbnfilename, interval, *labels, **kwds): r"""Returns a DataFrame from a HSPF binary output file.""" try: time_stamp = kwds.pop("time_stamp") except KeyError: time_stamp = "begin" if time_stamp not in ["begin", "end"]: raise ValueError( tsutils.error_wrapper(""" The "time_stamp" optional keyword must be either "begin" or "end". You gave {0}. """.format(time_stamp))) try: sortall = bool(kwds.pop("sorted")) except KeyError: sortall = False if not (sortall is True or sortall is False): raise ValueError( tsutils.error_wrapper(""" The "sorted" optional keyword must be either True or False. You gave {0}. """.format(sortall))) if len(kwds) > 0: raise ValueError( tsutils.error_wrapper(""" The extract command only accepts optional keywords 'time_stamp' and 'sorted'. You gave {0}. """.format(list(kwds.keys())))) interval = interval.lower() if interval not in ["bivl", "daily", "monthly", "yearly"]: raise ValueError( tsutils.error_wrapper(""" The "interval" argument must be one of "bivl", "daily", "monthly", or "yearly". You supplied "{0}". """.format(interval))) index, data = _get_data(hbnfilename, interval, labels, catalog_only=False) index = index[interval2codemap[interval]] index = sorted(index.keys()) skeys = list(data.keys()) if sortall is True: skeys.sort(key=lambda tup: tup[1:]) else: skeys.sort() result = pd.DataFrame( pd.concat([pd.Series(data[i], index=index) for i in skeys], sort=False, axis=1).reindex(pd.Index(index))) columns = ["{0}_{1}_{2}_{3}".format(i[1], i[2], i[4], i[5]) for i in skeys] result.columns = columns if time_stamp == "begin": result = tsutils.asbestfreq(result) result = result.tshift(-1) result.index.name = "Datetime" return result
def read_dsn(self, wdmpath, dsn, start_date=None, end_date=None): """Read from a DSN.""" if not os.path.exists(wdmpath): raise ValueError( tsutils.error_wrapper(""" File {0} does not exist. """.format(wdmpath))) # Call wdatim_ to get LLSDAT, LLEDAT, TSTEP, TCODE desc_dsn = self.describe_dsn(wdmpath, dsn) llsdat = desc_dsn["llsdat"] lledat = desc_dsn["lledat"] tcode = desc_dsn["tcode"] tstep = desc_dsn["tstep"] tsfill = desc_dsn["tsfill"] # These calls convert 24 to midnight of the next day self.timcvt(llsdat) self.timcvt(lledat) if start_date is not None: start_date = self.dateconverter(start_date) start_date = datetime.datetime(*start_date) if start_date > datetime.datetime(*lledat): raise ValueError( tsutils.error_wrapper(""" The requested start date ({0}) is after the end date ({1}) of the time series in the WDM file. """.format(start_date, datetime.datetime(*lledat)))) if end_date is not None: end_date = self.dateconverter(end_date) end_date = datetime.datetime(*end_date) if end_date < datetime.datetime(*llsdat): raise ValueError( tsutils.error_wrapper(""" The requested end date ({0}) is before the start date ({1}) of the time series in the WDM file. """.format(end_date, datetime.datetime(*llsdat)))) iterm = self.timdif(llsdat, lledat, tcode, tstep) dtran = 0 qualfg = 30 # Get the data and put it into dictionary wdmfp = self._open(wdmpath, 59, ronwfg=1) dataout, retcode = self.wdtget(wdmfp, dsn, tstep, llsdat, iterm, dtran, qualfg, tcode) self._close(wdmpath) if len(dataout) == 0: return pd.DataFrame() self._retcode_check(retcode, additional_info="wdtget file={0} DSN={1}".format( wdmpath, dsn)) index = pd.date_range( datetime.datetime(*llsdat), periods=iterm, freq="{0:d}{1}".format(tstep, MAPTCODE[tcode]), ) # Convert time series to pandas DataFrame tmpval = pd.DataFrame( pd.Series( dataout, index=index, name="{0}_DSN_{1}".format(os.path.basename(wdmpath), dsn), ), dtype=np.float64, ) tmpval = tsutils.common_kwds(tmpval, start_date=start_date, end_date=end_date) tmpval.replace(tsfill, np.nan, inplace=True) tmpval.index.name = "Datetime" return tmpval
def wind_speed( method: Literal["equal", "cosine", "random"], source_units, input_ts="-", columns=None, start_date=None, end_date=None, dropna="no", clean=False, round_index=None, skiprows=None, index_type="datetime", names=None, target_units=None, print_input=False, a=None, b=None, t_shift=None, ): """Disaggregate daily to hourly data. disaggregate_wind(wind_daily, method='equal', a=None, b=None, t_shift=None): Args: wind_daily: daily values method: keyword specifying the disaggregation method to be used a: parameter a for the cosine function b: parameter b for the cosine function t_shift: parameter t_shift for the cosine function """ target_units = single_target_units(source_units, target_units, "m/s") target_units = target_units[0] * len(source_units) pd.options.display.width = 60 if method == "cosine" and (a is None or b is None or t_shift is None): raise ValueError( tsutils.error_wrapper(""" For the "cosine" method, requires the `a`, `b`, and `t_shift` keywords. You gave: a = {a} b = {b} t_shift = {t_shift} """.format(**locals()))) tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) return tsutils.return_input( print_input, tsd, pd.DataFrame( disaggregate_wind(tsd, method=method, a=a, b=b, t_shift=t_shift)), )
def humidity( method: Literal["equal", "minimal", "dewpoint_regression", "linear_dewpoint_variation", "min_max", "month_hour_precip_mean", ], source_units, input_ts="-", columns=None, start_date=None, end_date=None, dropna="no", clean=False, round_index=None, skiprows=None, index_type="datetime", names=None, target_units=None, print_input=False, hum_min_col=None, hum_max_col=None, hum_mean_col=None, temp_min_col=None, temp_max_col=None, a0=None, a1=None, kr=None, hourly_temp=None, preserve_daily_mean=None, ): """Disaggregate daily humidity to hourly humidity data. disaggregate_humidity(data_daily, method='equal', temp=None, a0=None, a1=None, kr=None, month_hour_precip_mean=None, preserve_daily_mean=False): Args: daily_data: daily values method: keyword specifying the disaggregation method to be used temp: hourly temperature time series (necessary for some methods) kr: parameter for linear_dewpoint_variation method (6 or 12) month_hour_precip_mean: [month, hour, precip(y/n)] categorical mean values preserve_daily_mean: if True, correct the daily mean values of the disaggregated data with the observed daily means. if method=equal .hum from hum_mean_col if method in ["minimal", "dewpoint_regression", "linear_dewpoint_variation"] .tmin from temp_min_col if method=min_max need .hum_min from hum_min_col .hum_max from hum_max_col .tmin from temp_min_col .tmax from temp_max_col if method in ["dewpoint_regression", "linear_dewpoint_variation"] a0 a1 if method in ["linear_dewpoint_variation"] kr if preserve_daily_mean is True .hum from hum_mean_col if method in ["minimal", "dewpoint_regression", "linear_dewpoint_variation", "min_max"] need HOURLY temperature in 'temp' """ target_units = single_target_units(source_units, target_units, "") if method == "equal" and hum_mean_col is None: raise ValueError( tsutils.error_wrapper(""" If `method` is "equal" then the mean daily humidity is a required column identified with the keyword `hum_mean_col`""")) if (method in ["minimal", "dewpoint_regression", "linear_dewpoint_variation"] and temp_min_col is None): raise ValueError( tsutils.error_wrapper(""" If `method` is "minimal", "dewpoint_regression", or "linear_dewpoint_variation" then the minimum daily temperature is a required column identified with the keyword `temp_min_col`.""")) if method == "min_max" and (hum_min_col is None or hum_max_col is None or temp_min_col is None or temp_max_col is None): raise ValueError( tsutils.error_wrapper(""" If `method` is "min_max" then: Minimum daily humidity is a required column identified with the keyword `hum_min_col`. You gave {hum_min_col}. Maximum daily humidity is a required column identified with the keyword `hum_max_col`. You gave {hum_max_col}. Minimum daily temperature is a required column identified with the keyword `temp_min_col`. You gave {temp_min_col}. Maximum daily temperature is a required column identified with the keyword `temp_max_col`. You gave {temp_max_col}. """.format(**locals()))) if method in ["dewpoint_regression", "linear_dewpoint_variation" ] and (a0 is None or a1 is None): raise ValueError( tsutils.error_wrapper(""" If `method` is "dewpoint_regression" or "linear_dewpoint_variation" then a0 and a1 must be given.""")) if method == "linear_dewpoint_variation" and kr is None: raise ValueError( tsutils.error_wrapper(""" If `method` is "linear_dewpoint_variation" then kr must be given""")) if (method in [ "minimal", "dewpoint_regression", "linear_dewpoint_variation", "min_max", "month_hour_precip_mean", ] and hourly_temp is None): raise ValueError( tsutils.error_wrapper(""" If `method` is "minimal", "dewpoint_regression", "linear_dewpoint_variation", or "min_max" then hourly temperature is required identified by the filename in keyword `hourly_temp`.""")) pd.options.display.width = 60 tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) if method == "equal": tsd["hum"] = tsd["hum_mean_col"] if method in [ "minimal", "dewpoint_regression", "linear_dewpoint_variation" ]: tsd["tmin"] = tsd["temp_min_col"] if method == "min_max": tsd["hum_min"] = tsd["hum_min_col"] tsd["hum_max"] = tsd["hum_max_col"] tsd["tmin"] = tsd["temp_min_col"] tsd["tmax"] = tsd["temp_max_col"] if preserve_daily_mean is not None: tsd["hum"] = tsd[preserve_daily_mean] preserve_daily_mean = True if method in [ "minimal", "dewpoint_regression", "linear_dewpoint_variation", "min_max", "month_hour_precip_mean", ]: hourly_temp = tstoolbox.read(hourly_temp) if method == "month_hour_precip_mean": month_hour_precip_mean = calculate_month_hour_precip_mean(hourly_temp) ntsd = pd.DataFrame( disaggregate_humidity( tsd, method=method, temp=hourly_temp, a0=a0, a1=a1, kr=kr, preserve_daily_mean=preserve_daily_mean, month_hour_precip_mean=month_hour_precip_mean, )) ntsd.columns = ["humidity:{0}:disagg"] return tsutils.return_input(print_input, tsd, ntsd)
def csvtowdm( wdmpath, dsn, start_date=None, end_date=None, columns=None, force_freq=None, groupby=None, round_index=None, clean=False, target_units=None, source_units=None, input_ts="-", ): """Write data from a CSV file to a DSN. File can have comma separated 'year', 'month', 'day', 'hour', 'minute', 'second', 'value' OR 'date/time string', 'value' Parameters ---------- wdmpath Path and WDM filename. dsn The Data Set Number in the WDM file. {input_ts} {start_date} {end_date} {columns} {force_freq} {groupby} {round_index} {clean} {target_units} {source_units} """ tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts), start_date=start_date, end_date=end_date, pick=columns, force_freq=force_freq, groupby=groupby, round_index=round_index, clean=clean, target_units=target_units, source_units=source_units, ) if len(tsd.columns) > 1: raise ValueError( tsutils.error_wrapper(""" The input data set must contain only 1 time series. You gave {0}. """.format(len(tsd.columns)))) _writetodsn(wdmpath, dsn, tsd)
def _writetodsn(wdmpath, dsn, data): """Local function to write Pandas data frame to DSN.""" data = tsutils.asbestfreq(data) infer = data.index.freqstr pandacode = infer.lstrip("0123456789") tstep = infer[:infer.find(pandacode)] try: tstep = int(tstep) except ValueError: tstep = 1 invmapcode = { 1: "second", 2: "minute", 3: "hour", 4: "day", 5: "month", 6: "annual", } mapcode = { "A": 6, # annual "A-DEC": 6, # annual "AS": 6, # annual start "M": 5, # month "MS": 5, # month start "D": 4, # day "H": 3, # hour "T": 2, # minute "S": 1, # second } try: finterval = mapcode[pandacode] except KeyError: raise KeyError(""" * * wdmtoolbox only understands PANDAS time intervals of : * 'A', 'AS', 'A-DEC' for annual, * 'M', 'MS' for monthly, * 'D', 'H', 'T', 'S' for day, hour, minute, and second. * wdmtoolbox thinks this series is {0}. * """.format(pandacode)) # Convert string to int dsn = int(dsn) # Make sure that input data metadata matches target DSN desc_dsn = _describedsn(wdmpath, dsn) dsntcode = desc_dsn["tcode"] if finterval != dsntcode: raise ValueError( tsutils.error_wrapper(""" The DSN {2} has a tcode of {0} ({3}), but the data has a tcode of {1} ({4}). """.format( dsntcode, finterval, dsn, invmapcode[dsntcode], invmapcode[finterval], ))) dsntstep = desc_dsn["tstep"] if dsntstep != tstep: raise ValueError( tsutils.error_wrapper(""" The DSN has a tstep of {0}, but the data has a tstep of {1}. """.format(dsntstep, tstep))) WDM.write_dsn(wdmpath, dsn, data)
def process(uci, hbn, pwbe, year, ofilename, modulus, tablefmt, float_format=".2f"): from hspfbintoolbox.hspfbintoolbox import extract if ofilename: sys.stdout = open(ofilename, "w") try: year = int(year) except TypeError: pass lcnames = dict(zip(range(modulus + 1, 1), zip(range(modulus + 1, 1)))) inverse_lcnames = dict( zip(range(modulus + 1, 1), zip(range(modulus + 1, 1)))) inverse_lc = {} lnds = {} if uci is not None: with open(uci) as fp: content = fp.readlines() if not os.path.exists(hbn): raise ValueError(f""" * * File {hbn} does not exist. * """) content = [i[:80] for i in content] content = [i.rstrip() for i in content] schematic_start = content.index("SCHEMATIC") schematic_end = content.index("END SCHEMATIC") schematic = content[schematic_start:schematic_end + 1] perlnd_start = content.index("PERLND") perlnd_end = content.index("END PERLND") perlnd = content[perlnd_start:perlnd_end + 1] pgeninfo_start = perlnd.index(" GEN-INFO") pgeninfo_end = perlnd.index(" END GEN-INFO") pgeninfo = perlnd[pgeninfo_start:pgeninfo_end + 1] masslink_start = content.index("MASS-LINK") masslink_end = content.index("END MASS-LINK") masslink = content[masslink_start:masslink_end + 1] lcnames = {} inverse_lcnames = {} inverse_lc = {} for line in pgeninfo[1:-1]: if "***" in line: continue if "" == line.strip(): continue try: _ = int(line[5:10]) continue except ValueError: pass lcnames.setdefault(line[10:30].strip(), []).append(int(line[:5])) inverse_lcnames[int(line[:5])] = line[10:30].strip() inverse_lc[int(line[:5]) % modulus] = line[10:30].strip() masslink = [i for i in masslink if "***" not in i] masslink = [i for i in masslink if len(i.strip()) > 0] masslink = " ".join(masslink) mlgroups = re.findall( r" MASS-LINK +?([0-9]+).*?LND [PI]WATER [PS][EU]RO.*? END MASS-LINK +?\1 ", masslink, ) for line in schematic[3:-1]: if "***" in line: continue if "" == line: continue words = line.split() if words[0] in ["PERLND", "IMPLND"] and words[5] in mlgroups: lnds[(words[0], int(words[1]))] = lnds.setdefault( (words[0], int(words[1])), 0.0) + float(words[2]) try: pdf = extract(hbn, "yearly", ",,,") except ValueError: raise ValueError( tsutils.error_wrapper(f""" The binary file "{hbn}" does not have consistent ending months between PERLND and IMPLND. This could be caused by the BYREND (Binary YeaR END) being set differently in the PERLND:BINARY-INFO and IMPLND:BINARY-INFO, or you could have the PRINT-INFO bug. To work around the PRINT-INFO bug, add a PERLND PRINT-INFO block, setting the PYREND here will actually work in the BINARY-INFO block. """)) if year is not None: pdf = pd.DataFrame(pdf.loc[f"{year}-01-01", :]).T pdf = pdf[[i for i in pdf.columns if "PERLND" in i or "IMPLND" in i]] mindex = [i.split("_") for i in pdf.columns] mindex = [(i[0], int(i[1]), i[2], int(i[1]) % modulus) for i in mindex] mindex = pd.MultiIndex.from_tuples(mindex, names=["op", "number", "wbt", "lc"]) pdf.columns = mindex pdf = pdf.sort_index(axis="columns") mindex = pdf.columns aindex = [(i[0], i[1]) for i in pdf.columns] mindex = [( i[0], int(i[1]), i[2], int(i[1]) % modulus, float(lnds.setdefault(j, 0.0)), str(inverse_lcnames.setdefault(int(i[1]), "")), ) for i, j in zip(mindex, aindex)] mindex = pd.MultiIndex.from_tuples( mindex, names=["op", "number", "wbt", "lc", "area", "lcname"]) pdf.columns = mindex nsum = {} areas = {} namelist = {} setl = [i[1] for i in pwbe] setl = [item for sublist in setl for item in sublist] for lue in ["PERLND", "IMPLND"]: for wbterm in [i[0] for i in setl if i[0]]: for lc in list(range(1, modulus + 1)): try: subset = pdf.loc[:, (lue, slice(None), wbterm, lc, slice(None), slice(None))] except KeyError: continue _give_negative_warning(subset) if uci is None: if subset.empty is True: nsum[(lue, lc, wbterm)] = 0.0 if (lue, lc) not in namelist: namelist[(lue, lc)] = "" else: nsum[(lue, lc, wbterm)] = subset.mean(axis="columns").mean() namelist[(lue, lc)] = inverse_lc.setdefault(lc, lc) else: sareas = subset.columns.get_level_values("area") ssareas = sum(sareas) if (lue, lc) not in areas: areas[(lue, lc)] = ssareas if subset.empty is True or ssareas == 0: nsum[(lue, lc, wbterm)] = 0.0 if (lue, lc) not in namelist: namelist[(lue, lc)] = "" else: fa = sareas / areas[(lue, lc)] nsum[(lue, lc, wbterm)] = ((subset * fa).sum(axis="columns").mean()) namelist[(lue, lc)] = inverse_lc.setdefault(lc, lc) newnamelist = [] for key, value in sorted(namelist.items()): if key[0] != "PERLND": continue if key[1] == value: newnamelist.append(f"{key[1]}") else: newnamelist.append(f"{key[1]}-{value}") printlist = [] printlist.append([" "] + newnamelist + ["ALL"]) mapipratio = {} mapipratio["PERLND"] = 1.0 mapipratio["IMPLND"] = 1.0 if uci is not None: pareas = [] pnl = [] iareas = [] for nloper, nllc in namelist: if nloper == "PERLND": pnl.append((nloper, nllc)) pareas.append(areas[("PERLND", nllc)]) # If there is a PERLND there must be a IMPLND. for ploper, pllc in pnl: try: iareas.append(areas[("IMPLND", pllc)]) except KeyError: iareas.append(0.0) ipratio = np.array(iareas) / (np.array(pareas) + np.array(iareas)) ipratio = np.nan_to_num(ipratio) ipratio = np.pad(ipratio, (0, len(pareas) - len(iareas)), "constant") sumareas = sum(pareas) + sum(iareas) percent_areas = {} percent_areas["PERLND"] = np.array(pareas) / sumareas * 100 percent_areas["IMPLND"] = np.array(iareas) / sumareas * 100 percent_areas[ "COMBINED"] = percent_areas["PERLND"] + percent_areas["IMPLND"] printlist.append(["PERVIOUS"]) printlist.append(["AREA(acres)"] + [str(i) if i > 0 else "" for i in pareas] + [str(sum(pareas))]) printlist.append( ["AREA(%)"] + [str(i) if i > 0 else "" for i in percent_areas["PERLND"]] + [str(sum(percent_areas["PERLND"]))]) printlist.append([]) printlist.append(["IMPERVIOUS"]) printlist.append(["AREA(acres)"] + [str(i) if i > 0 else "" for i in iareas] + [str(sum(iareas))]) printlist.append( ["AREA(%)"] + [str(i) if i > 0 else "" for i in percent_areas["IMPLND"]] + [str(sum(percent_areas["IMPLND"]))]) printlist.append([]) mapipratio["PERLND"] = 1.0 - ipratio mapipratio["IMPLND"] = ipratio mapr = {} mapr["PERLND"] = 1.0 mapr["IMPLND"] = 1.0 for term, op in pwbe: if not term: printlist.append([]) continue test = [i[1] for i in op] if "IMPLND" in test and "PERLND" in test: maprat = mapipratio sumop = "COMBINED" else: maprat = mapr sumop = test[0] te = [0.0] for sterm, operation in op: try: tmp = np.array([ nsum[(*i, sterm)] for i in sorted(namelist) if i[0] == operation ]) if uci is not None: tmp = (np.pad(tmp, (0, len(pareas) - len(tmp)), "constant") * maprat[operation]) te = te + tmp except KeyError: pass if uci is None: te = ([term] + [str(i) if i > 0 else "" for i in te] + [str(sum(te) / len(te))]) else: nte = np.pad(te, (0, len(iareas) - len(te)), "constant") te = ([term] + [str(i) if i > 0 else "" for i in nte] + [str(sum(nte * percent_areas[sumop]) / 100)]) printlist.append(te) if tablefmt in ["csv", "tsv", "csv_nos", "tsv_nos"]: sep = { "csv": ",", "tsv": "\\t", "csv_nos": ",", "tsv_nos": "\\t" }[tablefmt] fmt = simple_separated_format(sep) else: fmt = tablefmt if tablefmt in ["csv_nos", "tsv_nos"]: print( re.sub(" *, *", ",", tabulate(printlist, tablefmt=fmt, floatfmt=float_format))) else: print(tabulate(printlist, tablefmt=fmt, floatfmt=float_format))
def temperature( method: Literal["sine_min_max", "sine_mean", "sine", "mean_course_min_max", "mean_course_mean"], source_units, min_max_time: Literal["fix", "sun_loc", "sun_loc_shift"] = "fix", mod_nighttime: bool = False, input_ts="-", start_date=None, end_date=None, dropna="no", clean=False, round_index=None, skiprows=None, index_type="datetime", names=None, print_input=False, target_units=None, max_delta: bool = False, temp_min_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None, temp_max_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None, temp_mean_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None, lat: Optional[FloatLatitude] = None, lon: Optional[FloatLongitude] = None, hourly: Optional[str] = None, ): """Disaggregate daily to hourly data. disaggregate_temperature(data_daily, method='sine_min_max', min_max_time='fix', mod_nighttime=False, max_delta=None, mean_course=None, sun_times=None): data_daily : daily data method : method to disaggregate min_max_time: "fix" - min/max temperature at fixed times 7h/14h, "sun_loc" - min/max calculated by sunrise/sunnoon + 2h, "sun_loc_shift" - min/max calculated by sunrise/sunnoon + monthly mean shift, mod_nighttime: ? max_delta: maximum monthly temperature shift as returned by get_shift_by_data() mean_course: ? sun_times: times of sunrise/noon as returned by get_sun_times() if method in ('sine_min_max', 'sine_mean', 'sine') .tmin from temp_min_col .tmax from temp_max_col .temp from temp_mean_col if method in ('mean_course_min', 'mean_course_mean') .tmin from temp_min_col .tmax from temp_max_col require hourly temperature perhaps from nearby station in 'mean_course' if method == 'mean_course_mean' .temp from temp_mean_col sun_times = get_sun_times(dates, lon, lat, round(lon/15.0)) max_delta = get_shift_by_data(temp_hourly, lon, lat, round(lon/15.0)) """ target_units = single_target_units(source_units, target_units, "degC") pd.options.display.width = 60 if (method in ["mean_course_min", "mean_course_mean"] or max_delta is True) and hourly is None: raise ValueError( tsutils.error_wrapper(""" The methods "mean_course_min", "mean_course_mean", or if `max_delta` is True, require a HOURLY temperature values in the CSV file specified by the keyword `hourly`.""")) if method in ["mean_course_min", "mean_course_mean"] or max_delta is True: hourly = tstoolbox.read(hourly) if max_delta is True: max_delta = get_shift_by_data(hourly, lon, lat, round(lon / 15.0)) else: max_delta = None if temp_min_col is None or temp_max_col is None: raise ValueError( tsutils.error_wrapper(""" For "temperature" disaggregation you need to supply the daily minimum column (name or number, data column numbering starts at 1) and the daily maximum column (name or number). Instead `temp_min_col` is {temp_min_col} and `temp_max_col` is {temp_max_col}""".format(**locals()))) columns = [] try: temp_min_col = int(temp_min_col) except TypeError: pass columns.append(temp_min_col) try: temp_max_col = int(temp_max_col) except TypeError: pass columns.append(temp_max_col) if temp_mean_col is not None: try: temp_mean_col = int(temp_mean_col) except TypeError: pass columns.append(temp_mean_col) tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) if len(tsd.columns) == 3: tsd.columns = ["tmin", "tmax", "temp"] else: tsd.columns = ["tmin", "tmax"] if any((tsd.tmax <= tsd.tmin).dropna()): raise ValueError( tsutils.error_wrapper(""" On the following dates: {0}, minimum temperature values in column "{1}" are greater than or equal to the maximum temperature values in column "{2}".""".format( tsd[tsd.tmax <= tsd.tmin].index, temp_min_col, temp_max_col))) if temp_mean_col is None: warnings.warn( tsutils.error_wrapper(""" Since `temp_mean_col` is None, the average daily temperature will be estimated by the average of `temp_min_col` and `temp_max_col`""".format( **locals()))) tsd["temp"] = (tsd.tmin + tsd.tmax) / 2.0 if any((tsd.tmin >= tsd.temp).dropna()) or any( (tsd.tmax <= tsd.temp).dropna()): raise ValueError( tsutils.error_wrapper(""" On the following dates: {0}, the daily average is either below or equal to the minimum temperature in column {1} or higher or equal to the maximum temperature in column {2}.""".format( tsd[tsd.tmin >= tsd.temp | tsd.tmax <= tsd.temp], temp_min_col, temp_max_col, ))) if min_max_time == "fix": # Not dependent on sun, just average values. sun_times = pd.DataFrame( index=[1], columns=["sunrise", "sunnoon", "sunset", "daylength"]) sun_times.sunrise = 7 sun_times.sunnoon = 12 sun_times.sunset = 19 sun_times.daylength = 12 else: if lat is None or lon is None: raise ValueError( tsutils.error_wrapper(""" The `min_max_time` options other than "fix" require calculation of sunrise, sun noon, sunset, and day length. The calculation requires the latitude with keyword "lat" and longitude with keyword "lon". You gave: lat={lat} lon={lon} """.format(**locals()))) sun_times = get_sun_times(tsd.index, float(lon), float(lat), round(lon / 15.0)) ntsd = pd.DataFrame( disaggregate_temperature( tsd, method=method, min_max_time=min_max_time, mod_nighttime=mod_nighttime, max_delta=max_delta, mean_course=hourly, sun_times=sun_times, )) ntsd.columns = ["temperature:{0}:disagg".format(target_units[0])] return tsutils.return_input(print_input, tsd, ntsd)
def _preprocess( input_ts, temp_min_col, temp_max_col, temp_mean_col, temp_min_required, temp_max_required, skiprows, names, index_type, start_date, end_date, round_index, dropna, clean, source_units, ): columns, column_names = utils._check_temperature_cols( temp_min_col=temp_min_col, temp_max_col=temp_max_col, temp_mean_col=temp_mean_col, temp_min_required=temp_min_required, temp_max_required=temp_max_required, ) tsd = tsutils.common_kwds( input_ts, skiprows=skiprows, names=names, index_type=index_type, start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, clean=clean, ) if source_units is None: # If "source_units" keyword is None must have source_units in column name. source_units = [] for units in tsd.columns: words = units.split(":") if len(words) >= 2: source_units.append(words[1]) else: raise ValueError( tsutils.error_wrapper( """ If "source_units" are not supplied as the second ":" delimited field in the column name they must be supplied with the "source_units" keyword. """ ) ) else: source_units = tsutils.make_list(source_units) if len(source_units) != len(tsd.columns): raise ValueError( tsutils.error_wrapper( """ The number of "source_units" terms must match the number of temperature columns. """ ) ) interim_target_units = ["degC"] * len(tsd.columns) tsd = tsutils.common_kwds( tsd, source_units=source_units, target_units=interim_target_units, ) tsd.columns = column_names tsd = utils._validate_temperatures(tsd, temp_min_col, temp_max_col) return tsd
def evaporation( method: Literal["trap", "fixed"], source_units, input_ts="-", columns=None, start_date=None, end_date=None, dropna="no", clean=False, round_index=None, skiprows=None, index_type="datetime", names=None, target_units=None, print_input=False, lat: Optional[FloatLatitude] = None, ): """Disaggregate daily to hourly data.""" target_units = single_target_units(source_units, target_units) pd.options.display.width = 60 if method == "trap" and lat is None: raise ValueError( tsutils.error_wrapper(""" The "trap" method requires latitude with the `lat` keyword. You gave "{lat}". """.format(**locals()))) tsd = tsutils.common_kwds( tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna=dropna, source_units=source_units, target_units=target_units, clean=clean, ) ntsd = tsd.append( pd.DataFrame(columns=tsd.columns, index=[tsd.index[-1] + datetime.timedelta(days=1)])) ndata = ntsd.resample("H").ffill() fdata = pd.DataFrame(columns=ndata.columns, index=ndata.index, dtype="f") if method == "trap": lrad = lat * np.pi / 180.0 ad = 0.40928 * np.cos(0.0172141 * (172 - tsd.index.dayofyear)) ss = np.sin(lrad) * np.sin(ad) cs = np.cos(lrad) * np.cos(ad) x2 = -ss / cs delt = 7.6394 * (np.pi / 2.0 - np.arctan(x2 / np.square(1 - x2**2))) sunr = 12.0 - delt / 2.0 # develop hourly distribution given sunrise, # sunset and length of day (DELT) dtr2 = delt / 2.0 dtr4 = delt / 4.0 tr2 = sunr + dtr4 tr3 = tr2 + dtr2 tr4 = tr3 + dtr4 for index, toss in enumerate(sunr): cdate = ntsd.index[index] fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate. day, int(sunr[index])), :, ] = 0.0 fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate.day, int(tr4[index]) + 1), :, ] = 0.0 fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate. day, int(round(tr2[index]))), :, ] = 1.0 fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate. day, int(round(tr3[index]))), :, ] = 1.0 fdata.ix[0, :] = 0.0 fdata.ix[-1, :] = 0.0 fdata = fdata.interpolate("linear") fdata = fdata.fillna(0.0) fdata = fdata / fdata.groupby( pd.Grouper(freq="D")).sum().resample("H").ffill() fdata = fdata * ndata fdata = fdata.ix[:-1, :] elif method == "fixed": # DATA EVAPDIST / 0.000,0.000,0.000,0.000,0.000,0.000,0.019,0.041, # $ 0.067,0.088,0.102,0.110,0.110,0.110,0.105,0.095, # $ 0.081,0.055,0.017,0.000,0.000,0.000,0.000,0.000 fdata = fdata.fillna(0.0) fdata[fdata.index.hour == 7] = 0.019 fdata[fdata.index.hour == 8] = 0.041 fdata[fdata.index.hour == 9] = 0.067 fdata[fdata.index.hour == 10] = 0.088 fdata[fdata.index.hour == 11] = 0.102 fdata[fdata.index.hour == 12] = 0.110 fdata[fdata.index.hour == 13] = 0.110 fdata[fdata.index.hour == 14] = 0.110 fdata[fdata.index.hour == 15] = 0.105 fdata[fdata.index.hour == 16] = 0.095 fdata[fdata.index.hour == 17] = 0.081 fdata[fdata.index.hour == 18] = 0.055 fdata[fdata.index.hour == 19] = 0.017 fdata = fdata * ndata fdata = fdata.ix[:-1, :] return tsutils.print_input(print_input, tsd, fdata, None)