start = datetime.datetime(1980, 1, 1) end = datetime.datetime(1990, 1, 2) # make a time series delta = datetime.timedelta(days=1) times = [start + i * delta for i in range((end - start).days)] # path to 7zip if using windows processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data; this step will be skipped if this has already been done # (from the last example) but is necessary to set the metadata processor.download(bbox, start, end, output, datasets=['GSOD']) # let's use the processor to aggregate the GSOD data together, including # tmin, tmax, dewpoint, and wind speed. The GSOD database contains dew point # and seems to be more complete than GHCND. It doesn't have snow or pan # evaporation though--this is why they are both included. tmax = processor.aggregate('GSOD', 'tmax', start, end) tmin = processor.aggregate('GSOD', 'tmin', start, end) dewpoint = processor.aggregate('GSOD', 'dewpoint', start, end) wind = processor.aggregate('GSOD', 'wind', start, end) # now these time series can be saved for later consistent with the structure # used by PyHSPF's HSPFModel class (start date, time step in minutes, data) # this way the data are easy to access later
bbox = -77.2056, 38.2666, -76.4008, 39.3539 # start and end dates (just look at a few months to highlight the concept) start = datetime.datetime(1980, 4, 1) end = datetime.datetime(1980, 10, 2) # path to 7zip if using windows processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data; this step will be skipped if this has already been done # (from the last example) but is necessary to set the metadata processor.download(bbox, start, end, output, datasets = ['GHCND']) # let's use GHCND to work with temperature data. let's use the metadata to # find the 10 stations with the longest temperature records and then aggregate # the data together to get a mean time series across the period of interest. n = 10 # make a list of the templengths = [] for k, v in processor.metadata.ghcndstations.items(): templengths.append((v['tmax'], k)) # sort the list; the last 10 values are the longest records
bbox = -77.2056, 38.2666, -76.4008, 39.3539 # start and end dates (aggregate the whole 31 years) start = datetime.datetime(1980, 1, 1) end = datetime.datetime(2011, 1, 1) # path to 7zip if using windows processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data; this step will be skipped if this has already been done # (from the last example); alternatively it will set the metadata processor.download(bbox, start, end, output, datasets = ['precip3240']) # aggregate the data -- it's important to keep in mind missing data at many # of these stations and the high degree of spatial variability associated with # precipitation. in this example, all the data are aggregating into one series; # however, it may make sense to aggregate more specifically to capture this # variability to some degree (lots of papers on this subject). precip = processor.aggregate('precip3240', 'precip', start, end) # now these time series can be saved for later consistent with the structure # used by PyHSPF's HSPFModel class (start date, time step in minutes, data) # this way the data are easy to access later name = '{}/precip3240_aggregated_precip'.format(output) ts = start, 60, precip
# you may need to change the path to 7zip for your system; can also be done # when the processor is instantiated (should not be necessary on linux) processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data--worth noting there are some issues with the databases # sometimes. these are handled with warnings, but this will give you ALL the # data available so it takes a while (obviously longer the bigger the area # requested) and further processing will be needed to develop "final" datasets # for a model simulation. if the subdirectories for the various databases in # the output directory already exist (i.e., if you have already done the data # download), this step will be skipped but it will tell the processor the # location of the data files and read the metadata for further processing. processor.download(bbox, start, end, output, datasets=datasets) # because there are so many data files, it makes sense to keep track of some # metadata for the files to use for parsing, etc. the following lines show # how the metadata are organized. the filenames are dictionary keys, and the # values are dictionaries containing the name of the station, the latitude, # the longitude, the elevation, and the length of the datasets. # GHCND (daily precip, tmax, tmin, snowdepth, snowfall, wind, pan evaporation for k, v in list(processor.metadata.ghcndstations.items()): print(('\n'.join( ['{} {} {}'.format(k, p, val) for p, val in list(v.items())]))) # GSOD (daily precip, tmax, tmin, wind, dewpoint)
# you may need to change the path to 7zip for your system; can also be done # when the processor is instantiated (should not be necessary on linux) processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data--worth noting there are some issues with the databases # sometimes. these are handled with warnings, but this will give you ALL the # data available so it takes a while (obviously longer the bigger the area # requested) and further processing will be needed to develop "final" datasets # for a model simulation. if the subdirectories for the various databases in # the output directory already exist (i.e., if you have already done the data # download), this step will be skipped but it will tell the processor the # location of the data files and read the metadata for further processing. processor.download(bbox, start, end, output, datasets = datasets) # because there are so many data files, it makes sense to keep track of some # metadata for the files to use for parsing, etc. the following lines show # how the metadata are organized. the filenames are dictionary keys, and the # values are dictionaries containing the name of the station, the latitude, # the longitude, the elevation, and the length of the datasets. # GHCND (daily precip, tmax, tmin, snowdepth, snowfall, wind, pan evaporation for k, v in processor.metadata.ghcndstations.items(): print('\n'.join(['{} {} {}'.format(k, p, val) for p, val in v.items()])) # GSOD (daily precip, tmax, tmin, wind, dewpoint)
bbox = -77.2056, 38.2666, -76.4008, 39.3539 # start and end dates (aggregate the whole 31 years) start = datetime.datetime(1980, 1, 1) end = datetime.datetime(2011, 1, 1) # path to 7zip if using windows processor.path_to_7z = r'C:/Program Files/7-Zip/7z.exe' # download the data; this step will be skipped if this has already been done # (from the last example); alternatively it will set the metadata processor.download(bbox, start, end, output, datasets = ['NSRDB']) # aggregate the data -- worth noting that prior to 1991 is a separate database # that corresponds more closely to METSTAT than SUNY; requesting data prior # to 1991 will give the same values either way. metstat = processor.aggregate('NSRDB', 'metstat', start, end) suny = processor.aggregate('NSRDB', 'suny', start, end) # now these time series can be saved for later consistent with the structure # used by PyHSPF's HSPFModel class (start date, time step in minutes, data) # this way the data are easy to access later for n, dataset in zip(('metstat','suny'), (metstat, suny)): name = '{}/NSRDB_aggregated_{}'.format(output, n)