def prep_steps(self, *, yearmon: str) -> List[Step]: """ Prep steps are data preparation tasks that are executed once per model iteration. They may include downloading, unpackaging, aggregation, or conversion of data inputs. :param yearmon: yearmon of model iteration :return: a list of Steps """ steps = [] year, month = dates.parse_yearmon(yearmon) # Extract netCDF of monthly precipitation from full binary file steps.append( Step( targets=self.precip_monthly(yearmon=yearmon).file, dependencies=self.full_precip_file(), commands=[[ os.path.join('{BINDIR}', 'utils', 'noaa_global_leaky_bucket', 'read_binary_grid.R'), '--input', self.full_precip_file(), '--update_url', 'ftp://ftp.cpc.ncep.noaa.gov/wd51yf/global_monthly/gridded_binary/p.long', '--output', self.precip_monthly(yearmon=yearmon).file, '--var', 'P', '--yearmon', yearmon, ]])) # Extract netCDF of monthly temperature from full binary file steps.append( Step( targets=self.temp_monthly(yearmon=yearmon).file, dependencies=self.full_temp_file(), commands=[[ os.path.join('{BINDIR}', 'utils', 'noaa_global_leaky_bucket', 'read_binary_grid.R'), '--input', self.full_temp_file(), '--update_url', 'ftp://ftp.cpc.ncep.noaa.gov/wd51yf/global_monthly/gridded_binary/t.long', '--output', self.temp_monthly(yearmon=yearmon).file, '--var', 'T', '--yearmon', yearmon ]])) if year >= 1979: steps += cpc_daily_precipitation.download_monthly_precipitation( yearmon=yearmon, workdir=os.path.join(self.source, 'NCEP', 'daily_precip'), wetdays_fname=self.p_wetdays(yearmon=yearmon).file) return steps
def test_step_comments(self): s = Step(targets='a', dependencies=[], commands=[['touch', 'a']], comment='Step to build a') self.assertEqual(write_step(s).split('\n')[0], '# Step to build a')
def test_tab_indentation(self): # Make requires that all command lines be tab-indented s = Step(targets=['a', 'b'], dependencies=['c', 'd'], commands=[['echo', 'c', '>', 'a'], ['echo', 'd', '>', 'b']]) lines = write_step(s).strip().split('\n') for line in lines[1:]: self.assertEqual(line[0], '\t')
def crop_to_nldas(file_in, file_out): return [ Step(targets=file_out, dependencies=file_in, commands=[[ 'gdalwarp', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-of', 'GTiff', '-te', '-125.0005', '25.0005', '-67.0005', '53.0005', '-co', 'COMPRESS=deflate', file_in, file_out ]]) ]
def test_target_directories_created_but_only_once(self): s = Step( targets=['/tmp/fizz/fuzz/ok.txt', '/src/junk.h', '/src/junk.c'], dependencies=[], commands=[['do_something']]) commands = unformat(write_step(s)).split('\n')[1:] self.assertTrue('mkdir -p /src /tmp/fizz/fuzz' in commands) self.assertEqual(2, len(commands))
def compute_fit_hindcast(self, varname: str, month: int, lead: int) -> List[Step]: assert varname in WSIM_FORCING_VARIABLES start = self.min_hindcast_year + 1 stop = self.max_hindcast_year - 1 # The lead months will be computed by WSIM, based on the difference between # the WSIM data version and target month. Since the NMME forecast reference # time is one month greater than the WSIM data version, the hindcast fits # at an N-month lead need to be computed from NMME data with an (N-1)-month # lead. # # As an example, a WSIM workflow generated for data version 201901 will # will request a corrected NMME forecast targeting 201904, which it will # consider to be a 3-month lead time. The NMME forecast accessed by # WSIM data version 201901 will have an NMME reference time of 201902. # So for the context of the fit_nmme_hindcasts.R script, the raw # forecast will have a lead time of two months. output = self.fit_retro(var=varname, target_month=month, lead_months=lead) return [ Step( targets=output, dependencies=self.hindcast(varname), commands=[[ os.path.join('{BINDIR}', 'utils', 'nmme', 'fit_nmme_hindcasts.R'), '--distribution', 'gev', '--input', self.hindcast(varname), '--varname', varname, '--min_year', str(start), '--max_year', str(stop), '--target_month', str(month), '--output', output, '--lead', str( lead - 1 ) # <--- IMPORTANT! Subtract 1 from lead to account for # difference between WSIM data version / "yearmon" # and NMME forecast "reference time". ]]) ]
def test_pattern_rule_conversion(self): s = Step(targets=['a.txt', 'b.txt'], dependencies='source.txt', commands=[['process', 'source.txt', 'a.txt', 'b.txt']]) declaration_line, command_line = write_step(s).split('\n')[:2] self.assertTrue('a%txt' in declaration_line) self.assertTrue('b%txt' in declaration_line) self.assertTrue('source%txt' in declaration_line) self.assertTrue('a.txt' in command_line) self.assertTrue('b.txt' in command_line) self.assertTrue('source.txt' in command_line)
def download_monthly_temp_and_precip_files(self) -> List[Step]: """ Steps to download (or update) the t.long and p.long full data sets from NCEP. Because this is a single step (no matter which yearmon we're running), we can't include it in prep_steps below. """ return [ Step( targets=self.full_temp_file(), dependencies=[], commands=[[ 'wget', '--continue', '--directory-prefix', os.path.join(self.source, 'NCEP'), 'ftp://ftp.cpc.ncep.noaa.gov/wd51yf/global_monthly/gridded_binary/t.long' ]]), Step( targets=self.full_precip_file(), dependencies=[], commands=[[ 'wget', '--continue', '--directory-prefix', os.path.join(self.source, 'NCEP'), 'ftp://ftp.cpc.ncep.noaa.gov/wd51yf/global_monthly/gridded_binary/p.long' ]]) ]
def prep_steps(self, *, yearmon: str, target: str, member: str) -> List[Step]: steps = [] _, nmme_month = dates.parse_yearmon(wsim_to_nmme_yearmon(yearmon)) # Hack to only download these once although they are required for # all members / forecast targets if int(member) == 1 and target == dates.add_months(yearmon, 1): steps += self.download_realtime_anomalies( nmme_yearmon=wsim_to_nmme_yearmon(yearmon)) output = self.forecast_raw(yearmon=yearmon, target=target, member=member).split('::')[0] steps.append( Step(targets=output, dependencies=[ self.forecast_anom( nmme_yearmon=wsim_to_nmme_yearmon(yearmon), varname='T'), self.forecast_anom( nmme_yearmon=wsim_to_nmme_yearmon(yearmon), varname='Pr'), self.forecast_clim(nmme_month=nmme_month, varname='T'), self.forecast_clim(nmme_month=nmme_month, varname='Pr') ], commands=[[ os.path.join('{BINDIR}', 'utils', 'nmme', 'extract_nmme_forecast.R'), '--clim_precip', self.forecast_clim(nmme_month=nmme_month, varname='Pr'), '--clim_temp', self.forecast_clim(nmme_month=nmme_month, varname='T'), '--anom_precip', self.forecast_anom( nmme_yearmon=wsim_to_nmme_yearmon(yearmon), varname='Pr'), '--anom_temp', self.forecast_anom( nmme_yearmon=wsim_to_nmme_yearmon(yearmon), varname='T'), '--member', member, '--lead', str( dates.get_lead_months(wsim_to_nmme_yearmon(yearmon), target)), '--output', output ]])) return steps
def test_variable_substitution(self): s = Step( targets=['{ROOT_DIR}/fizz'], dependencies=['{SOURCE_DIR}/buzz'], commands=[['echo', '{SOURCE_DIR}/buzz', '>', '{ROOT_DIR}/fizz']]) step_text = write_step( s, dict(ROOT_DIR='/tmp/root', SOURCE_DIR='/tmp/src')) self.assertTrue('/tmp/root/fizz' in step_text) self.assertTrue('/tmp/src/buzz' in step_text) self.assertFalse('ROOT_DIR' in step_text) self.assertFalse('SRC_DIR' in step_text) self.assertFalse('{' in step_text) self.assertFalse('}' in step_text)
def test_commands_arguments_aligned(self): s = Step(targets='outputs/results.nc', dependencies='inputs.nc', commands=[[ 'process.py', '--input', 'inputs.nc', '--output', 'outputs/results.nc', '--compress', '3', '--nohistory' ]]) command_lines = [ line.strip() for line in write_step(s).split('\n') if line.startswith('\t') ] self.assertEqual(7, len(command_lines)) self.assertTrue( all( line.startswith('mkdir') or line.startswith('process.py') or line.startswith('-') for line in command_lines))
def prep_steps(self, *, yearmon: str, target: str, member: str) -> List[Step]: outfile = self.forecast_raw(yearmon=yearmon, member=member, target=target).split('::')[0] infile = self.forecast_grib(timestamp=member, target=target) return [ # Download the GRIB, if needed Step(targets=self.forecast_grib(timestamp=member, target=target), dependencies=[], commands=[[ os.path.join('{BINDIR}', 'utils', 'noaa_cfsv2_forecast', 'download_cfsv2_forecast.py'), '--timestamp', member, '--target', target, '--output_dir', self.grib_dir(timestamp=member) ]]), # Convert the forecast data from GRIB to netCDF commands.forecast_convert(infile, outfile) ]
def download_hindcasts(self): steps = [] iri_url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Models/.NMME/.{model}/.HINDCAST/.MONTHLY/.{varname}/dods' for varname in WSIM_FORCING_VARIABLES: steps.append( Step( targets=self.hindcast(varname), dependencies=[], commands=[[ 'nccopy', '-7', # netCDF-4 classic '-d', '1', # level-1 deflate, q( iri_url.format(model=self.model_name, varname=IRI_VARS[varname])), self.hindcast(varname) ]])) return steps
def download_hindcasts(self, target_month: int, lead: int) -> List[Step]: steps = [] for timestamp, target in self.available_hindcasts(target_month, lead): grib_file = self.hindcast_grib(timestamp=timestamp, target=target) grib_dir = os.path.dirname(grib_file) netcdf_file = self.hindcast_raw(timestamp=timestamp, target=target) steps.append( Step(targets=grib_file, dependencies=[], commands=[[ os.path.join('{BINDIR}', 'utils', 'noaa_cfsv2_forecast', 'download_cfsv2_forecast.py'), '--timestamp', timestamp, '--target', target, '--output_dir', grib_dir ]])) steps.append(commands.forecast_convert(grib_file, netcdf_file)) return steps
def test_variable_substitution_error(self): s = Step(targets='a', dependencies='b', commands=[['{PROGRAM}', 'a', 'b']]) self.assertRaises(KeyError, lambda: write_step(s, dict(PROG='q')))