def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id, hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id, hirs_csrb_monthly_delivery_id, hirs_ctp_orbital_delivery_id, hirs_ctp_daily_delivery_id, hirs_ctp_monthly_delivery_id, hirs_tpw_orbital_delivery_id, hirs_tpw_daily_delivery_id): granules = [ g.left for g in time_interval.overlapping_interval_series( timedelta(days=1), timedelta(days=1)) ] LOG.debug("Running find_contexts()") return [{ 'granule': g, 'satellite': satellite, 'hirs2nc_delivery_id': hirs2nc_delivery_id, 'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id, 'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id, 'hirs_csrb_monthly_delivery_id': hirs_csrb_monthly_delivery_id, 'hirs_ctp_orbital_delivery_id': hirs_ctp_orbital_delivery_id, 'hirs_ctp_daily_delivery_id': hirs_ctp_daily_delivery_id, 'hirs_ctp_monthly_delivery_id': hirs_ctp_monthly_delivery_id, 'hirs_tpw_orbital_delivery_id': hirs_tpw_orbital_delivery_id, 'hirs_tpw_daily_delivery_id': hirs_tpw_daily_delivery_id } for g in granules]
def run_task(self, inputs, context): LOG.debug("Running run_task()...") for key in context.keys(): LOG.debug("run_task() context['{}'] = {}".format( key, context[key])) rc = 0 # Extract a binary array from a CFSR reanalysis GRIB2 file on a # global equal angle grid at 0.5 degree resolution. CFSR files rc, cfsr_files = self.extract_bin_from_cfsr(inputs, context) # Create the CFSR statistics for the current day. rc, output_stats_file = self.create_cfsr_statistics( inputs, context, cfsr_files) if rc != 0: LOG.warn('Something went wrong, rc={}...'.format(rc)) return {} LOG.debug('create_cfsr_statistics() generated {}...'.format( output_stats_file)) # Create the CFSR means for the current day rc, output_means_file = self.create_cfsr_means(inputs, context, output_stats_file) if rc != 0: return rc LOG.debug( 'create_cfsr_means() generated {}...'.format(output_means_file)) LOG.debug('python return value = {}'.format(rc)) extra_attrs = { 'begin_time': context['granule'], 'end_time': context['granule'] + timedelta(days=1) - timedelta(seconds=1) } LOG.debug('extra_attrs = {}'.format(extra_attrs)) return { 'stats': { 'file': nc_compress(output_stats_file), 'extra_attrs': extra_attrs }, 'means': { 'file': nc_compress(output_means_file), 'extra_attrs': extra_attrs } }
def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id, hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id): granules = [ g.left for g in time_interval.overlapping_interval_series( timedelta(days=1), timedelta(days=1)) ] return [{ 'granule': g, 'satellite': satellite, 'hirs2nc_delivery_id': hirs2nc_delivery_id, 'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id, 'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id } for g in granules]
def cfsr_input(self, cfsr_bin_files, interval): # Get the CFSR datetime (00z, 06z, 12z, 18z, 00z) which is closest to the start # of the HIRS interval cfsr_granule = round_datetime(interval.left, timedelta(hours=6)) # Construct old and new CFSR filenames based on the CFSR datetime pgbhnl_filename = 'pgbhnl.gdas.{}.grb2.bin'.format( cfsr_granule.strftime('%Y%m%d%H')) cdas1_filename = 'cdas1.{}.t{}z.pgrbhanl.grib2.bin'.format( cfsr_granule.strftime('%Y%m%d'), cfsr_granule.strftime('%H')) LOG.debug("pgbhnl_filename file is {}".format(pgbhnl_filename)) LOG.debug("cdas1_filename file is {}".format(cdas1_filename)) for files in cfsr_bin_files: LOG.debug("Candidate file is {}".format(files)) if files == pgbhnl_filename: LOG.debug("We have a CFSR file match: {}".format(files)) return files elif files == cdas1_filename: LOG.debug("We have a CFSR file match: {}".format(files)) return files else: pass return None
def build_task(self, context, task): ''' Build up a set of inputs for a single context ''' LOG.debug("Running build_task()") # Instantiate the hirs_ctp_daily computation hirs_ctp_daily_comp = hirs_ctp_daily.HIRS_CTP_DAILY() num_days = monthrange(context['granule'].year, context['granule'].month)[1] interval = TimeInterval(context['granule'], context['granule'] + timedelta(num_days), False, True) daily_contexts = hirs_ctp_daily_comp.find_contexts( interval, context['satellite'], context['hirs2nc_delivery_id'], context['hirs_avhrr_delivery_id'], context['hirs_csrb_daily_delivery_id'], context['hirs_csrb_monthly_delivery_id'], context['hirs_ctp_orbital_delivery_id'], context['hirs_ctp_daily_delivery_id']) if len(daily_contexts) == 0: raise WorkflowNotReady( 'No HIRS_CTP_DAILY inputs available for {}'.format( context['granule'])) for (idx, daily_context) in enumerate(daily_contexts): hirs_ctp_daily_prod = hirs_ctp_daily_comp.dataset('out').product( daily_context) if SPC.exists(hirs_ctp_daily_prod): task.input('CTPD-{}'.format(idx), hirs_ctp_daily_prod, True)
def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id, hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id, hirs_csrb_monthly_delivery_id, hirs_ctp_orbital_delivery_id, hirs_ctp_daily_delivery_id, hirs_ctp_monthly_delivery_id): granules = [] start = datetime(time_interval.left.year, time_interval.left.month, 1) end = datetime(time_interval.right.year, time_interval.right.month, 1) date = start while date <= end: granules.append(date) date = date + timedelta(days=monthrange(date.year, date.month)[1]) return [{ 'granule': g, 'satellite': satellite, 'hirs2nc_delivery_id': hirs2nc_delivery_id, 'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id, 'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id, 'hirs_csrb_monthly_delivery_id': hirs_csrb_monthly_delivery_id, 'hirs_ctp_orbital_delivery_id': hirs_ctp_orbital_delivery_id, 'hirs_ctp_daily_delivery_id': hirs_ctp_daily_delivery_id, 'hirs_ctp_monthly_delivery_id': hirs_ctp_monthly_delivery_id } for g in granules]
def get_cfsr(self, granule): ''' Retrieve the CFSR file which covers the desired granule. ''' wedge = timedelta(seconds=1) day = timedelta(days=1) cfsr_granule = round_datetime(granule, timedelta(hours=6)) cfsr_file = None have_cfsr_file = False # Search for the old style pgbhnl.gdas.*.grb2 file from DAWG if not have_cfsr_file: LOG.debug( "Trying to retrieve CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG..." ) try: cfsr_file = dawg_catalog.file('', 'CFSR_PGRBHANL', cfsr_granule) have_cfsr_file = True except Exception, err: LOG.debug("{}.".format(err))
def hirs_to_time_interval(self, filename): ''' Takes the HIRS filename as input and returns the 1-day time interval covering that file. ''' file_chunks = filename.split('.') begin_time = datetime.strptime('.'.join(file_chunks[3:5]), 'D%y%j.S%H%M') end_time = datetime.strptime( '.'.join([file_chunks[3], file_chunks[5]]), 'D%y%j.E%H%M') if end_time < begin_time: end_time += timedelta(days=1) return TimeInterval(begin_time, end_time)
def process_metadata(self, file_type): ''' Run through the *.out file and create a nested dictionary containing the the required metadata for the files of each file_type. ''' LOG.debug("Reading the metadata file...") metadata_file = self.input_data[file_type] LOG.debug("For file_type = '{}', metadata_file = {}".format( file_type, metadata_file)) # Open the metadata file and read each line, splitting into the required # metadata for each file. with open(metadata_file) as metadata: for line in metadata: (size, mod_time, begin_time, end_time, sensor, sat, file_type, file_system, relative_path) = line.split(',') begin_time = datetime.fromtimestamp(float(begin_time)) end_time = datetime.fromtimestamp(float(end_time)) relative_path = relative_path.split()[0] name = os.path.basename(relative_path) if sensor not in self.file_data: self.file_data[sensor] = {} if sat not in self.file_data[sensor]: self.file_data[sensor][sat] = {} if file_type not in self.file_data[sensor][sat]: self.file_data[sensor][sat][file_type] = {} if name not in self.file_data[sensor][sat][file_type]: if end_time < begin_time: LOG.warning( "{}: End time {} is before begin time {}, adding a day." .format(name, end_time, begin_time)) end_time += timedelta(days=1) self.file_data[sensor][sat][file_type][name] = { 'data_interval': TimeInterval(begin_time, end_time), 'name': name, 'path': relative_path } self.indexed_file_types[file_type] = 1
from flo.ui import local_prepare, local_execute import flo.sw.hirs2nc as hirs2nc import flo.sw.hirs_avhrr as hirs_avhrr from flo.sw.hirs2nc.utils import setup_logging # every module should have a LOG object LOG = logging.getLogger(__name__) # # General information # #hirs2nc_delivery_id = '20180410-1' #hirs_avhrr_delivery_id = '20180505-1' wedge = timedelta(seconds=1.) # Satellite specific information #granule = datetime(2017, 1, 1, 0, 32) #interval = TimeInterval(granule, granule+timedelta(seconds=0)) def setup_computation(satellite): #satellite = 'metop-b' input_data = { 'HIR1B': '/mnt/software/flo/hirs_l1b_datalists/{0:}/HIR1B_{0:}_latest'.format( satellite), 'CFSR':
def build_task(self, context, task): ''' Build up a set of inputs for a single context ''' global delta_catalog LOG.debug("Running build_task()") LOG.debug("context: {}".format(context)) # Initialize the hirs2nc and hirs_avhrr modules with the data locations hirs2nc.delta_catalog = delta_catalog hirs_avhrr.delta_catalog = delta_catalog # Instantiate the hirs and hirs_avhrr computations hirs2nc_comp = hirs2nc.HIRS2NC() hirs_avhrr_comp = hirs_avhrr.HIRS_AVHRR() SPC = StoredProductCatalog() day = TimeInterval( context['granule'], (context['granule'] + timedelta(days=1) - timedelta(seconds=1))) hirs2nc_contexts = hirs2nc_comp.find_contexts( day, context['satellite'], context['hirs2nc_delivery_id']) if len(hirs2nc_contexts) == 0: raise WorkflowNotReady('NO HIRS Data For {}'.format( context['granule'])) # Input Counter. ic = 0 for hirs2nc_context in hirs2nc_contexts: # Making Input contexts hirs_avhrr_context = hirs2nc_context.copy() hirs_avhrr_context['hirs_avhrr_delivery_id'] = context[ 'hirs_avhrr_delivery_id'] LOG.debug("HIRS context: {}".format(hirs2nc_context)) LOG.debug("HIRS_AVHRR context: {}".format(hirs_avhrr_context)) # Confirming we have HIRS1B and COLLO products... hirs2nc_prod = hirs2nc_comp.dataset('out').product(hirs2nc_context) hirs_avhrr_prod = hirs_avhrr_comp.dataset('out').product( hirs_avhrr_context) # If HIRS1B and COLLO products exist, add them and the Patmos-X # file for this context to the list of input files to be downloaded to # the workspace... if SPC.exists(hirs2nc_prod) and SPC.exists(hirs_avhrr_prod): # Its safe to require all three inputs task.input('HIR1B-{}'.format(ic), hirs2nc_prod) task.input('COLLO-{}'.format(ic), hirs_avhrr_prod) task.input( 'PTMSX-{}'.format(ic), delta_catalog.file('avhrr', hirs2nc_context['satellite'], 'PTMSX', hirs2nc_context['granule'])) ic += 1 LOG.debug( "There are {} valid HIR1B/COLLO/PTMSX contexts in ({} -> {})". format(ic, day.left, day.right)) if ic == 0: LOG.warn( "There are no valid HIR1B/COLLO/PTMSX contexts in ({} -> {}), aborting..." .format(day.left, day.right)) return interval = TimeInterval(context['granule'], context['granule'] + timedelta(days=1)) num_cfsr_files = 0 # Search for the old style pgbhnl.gdas.*.grb2 files from the PEATE if num_cfsr_files == 0: LOG.debug( "Trying to retrieve CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG..." ) try: cfsr_files = dawg_catalog.files('', 'CFSR_PGRBHANL', interval) num_cfsr_files = len(cfsr_files) if num_cfsr_files == 0: LOG.debug("\tpgbhnl.gdas.*.grb2 CFSR files from DAWG : {}". format(cfsr_files)) except Exception, err: LOG.error("{}.".format(err)) LOG.warn( "Retrieval of CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG failed" )
def build_task(self, context, task): ''' Build up a set of inputs for a single context ''' global delta_catalog LOG.debug("Running build_task()") # Initialize the hirs_tpw_orbital module with the data locations hirs_tpw_orbital.delta_catalog = delta_catalog # Instantiate the hirs_tpw_orbital computation hirs_tpw_orbital_comp = hirs_tpw_orbital.HIRS_TPW_ORBITAL() SPC = StoredProductCatalog() # TPW Orbital Input granule = context['granule'] wedge = timedelta(seconds=1) hour = timedelta(hours=1) day = timedelta(days=1) # Add an hour to each end of the day to make sure the day is completely covered interval = TimeInterval(context['granule'] - 1 * hour, (context['granule'] + day + 1 * hour)) hirs_tpw_orbital_contexts = hirs_tpw_orbital_comp.find_contexts( interval, context['satellite'], context['hirs2nc_delivery_id'], context['hirs_avhrr_delivery_id'], context['hirs_csrb_daily_delivery_id'], context['hirs_csrb_monthly_delivery_id'], context['hirs_ctp_orbital_delivery_id'], context['hirs_ctp_daily_delivery_id'], context['hirs_ctp_monthly_delivery_id'], context['hirs_tpw_orbital_delivery_id']) if len(hirs_tpw_orbital_contexts) == 0: raise WorkflowNotReady( 'No HIRS_TPW_ORBITAL inputs available for {}'.format( context['granule'])) LOG.debug("There are {} TPW Orbital contexts for {}.".format( len(hirs_tpw_orbital_contexts), interval)) for context in hirs_tpw_orbital_contexts: LOG.debug(context) # Knock off all but the last of the "previous" day's contexts this_day = granule.day previous_day = (granule - day + wedge).day next_day = (granule + day + wedge).day LOG.debug("previous_day: {}".format(previous_day)) LOG.debug("this_day: {}".format(this_day)) LOG.debug("next_day: {}".format(next_day)) start_idx = 0 end_idx = -1 num_contexts = len(hirs_tpw_orbital_contexts) indices = np.arange(num_contexts) reverse_indices = np.flip(np.arange(num_contexts) - num_contexts, axis=0) # have this set to zero unless we need to set it otherwise (say for Metop-B) interval_pad = 0 # Pruning all but the last of the previous day's contexts for idx in indices: if hirs_tpw_orbital_contexts[ idx + interval_pad]['granule'].day == this_day: start_idx = idx LOG.debug("Breaking: start_idx = {}, granule = {}".format( start_idx, hirs_tpw_orbital_contexts[start_idx]['granule'])) break # Pruning all but the first of the next day's contexts for idx in reverse_indices: if hirs_tpw_orbital_contexts[ idx - interval_pad]['granule'].day == this_day: end_idx = idx LOG.debug("Breaking: end_idx = {}, granule = {}".format( end_idx, hirs_tpw_orbital_contexts[end_idx]['granule'])) break hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[ start_idx:end_idx + 1] #hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[start_idx:end_idx] for context in hirs_tpw_orbital_contexts: LOG.debug("{}".format(context)) for idx, context in enumerate(hirs_tpw_orbital_contexts): hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset( 'shift').product(context) if SPC.exists(hirs_tpw_orbital_prod): task.input('TPWO_shift-{}'.format(str(idx).zfill(2)), hirs_tpw_orbital_prod) for idx, context in enumerate(hirs_tpw_orbital_contexts): hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset( 'noshift').product(context) if SPC.exists(hirs_tpw_orbital_prod): task.input('TPWO_noshift-{}'.format(str(idx).zfill(2)), hirs_tpw_orbital_prod)
import flo.sw.hirs_ctp_daily as hirs_ctp_daily from flo.sw.hirs2nc.utils import setup_logging # every module should have a LOG object LOG = logging.getLogger(__name__) setup_logging(2) # General information hirs2nc_delivery_id = '20180410-1' hirs_avhrr_delivery_id = '20180505-1' hirs_csrb_daily_delivery_id = '20180714-1' hirs_csrb_monthly_delivery_id = '20180516-1' hirs_ctp_orbital_delivery_id = '20180730-1' hirs_ctp_daily_delivery_id = '20180802-1' wedge = timedelta(seconds=1.) day = timedelta(days=1.) hour = timedelta(hours=1.) # Satellite specific information #satellite = 'noaa-07' #granule = datetime(2015, 4, 17, 0, 20) #intervals = [TimeInterval(granule, granule + wedge - wedge)] # NSS.GHRR.NC.D81236.S0013.E0207.B0086970.WI.level2.hdf --> NSS.GHRR.NC.D85032.S2221.E0009.B1863132.GC.level2.hdf # datetime(1981, 8, 24, 0, 13) --> datetime(1985, 2, 1, 0, 9) #intervals = [] #for years in range(1981, 1986): #intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,13) ] #satellite = 'noaa-09'
LOG = logging.getLogger(__name__) # # General information # #hirs2nc_delivery_id = '20180410-1' #hirs_avhrr_delivery_id = '20180505-1' #hirs_csrb_daily_delivery_id = '20180714-1' #hirs_csrb_monthly_delivery_id = '20180516-1' #hirs_ctp_orbital_delivery_id = '20180730-1' #hirs_ctp_daily_delivery_id = '20180802-1' #hirs_ctp_monthly_delivery_id = '20180803-1' #hirs_tpw_orbital_delivery_id = '20190205-1' #hirs_tpw_daily_delivery_id = '20190318-1' wedge = timedelta(seconds=1.) day = timedelta(days=1.) # Satellite specific information #granule = datetime(2017, 1, 1, 0) #interval = TimeInterval(granule, granule+day-wedge) def setup_computation(satellite): input_data = {'HIR1B': '/mnt/software/flo/hirs_l1b_datalists/{0:}/HIR1B_{0:}_latest'.format(satellite), 'CFSR': '/mnt/cephfs_data/geoffc/hirs_data_lists/CFSR.out', 'PTMSX': '/mnt/software/flo/hirs_l1b_datalists/{0:}/PTMSX_{0:}_latest'.format(satellite)} # Data locations collection = {'HIR1B': 'ARCDATA',
def build_task(self, context, task): ''' Build up a set of inputs for a single context ''' global delta_catalog LOG.debug("Running build_task()") # Initialize the hirs2nc module with the data locations hirs2nc.delta_catalog = delta_catalog # Instantiate the hirs2nc and hirs_ctp_orbital computations hirs2nc_comp = hirs2nc.HIRS2NC() hirs_ctp_orbital_comp = hirs_ctp_orbital.HIRS_CTP_ORBITAL() SPC = StoredProductCatalog() # # HIRS L1B Input # hirs2nc_context = { 'satellite': context['satellite'], 'granule': context['granule'], 'hirs2nc_delivery_id': context['hirs2nc_delivery_id'] } hirs2nc_prod = hirs2nc_comp.dataset('out').product(hirs2nc_context) if SPC.exists(hirs2nc_prod): task.input('HIR1B', hirs2nc_prod) else: raise WorkflowNotReady('No HIRS inputs available for {}'.format( hirs2nc_context['granule'])) # # CTP Orbital Input # hirs_ctp_orbital_context = context.copy() [ hirs_ctp_orbital_context.pop(k) for k in [ 'hirs_ctp_daily_delivery_id', 'hirs_ctp_monthly_delivery_id', 'hirs_tpw_orbital_delivery_id' ] ] hirs_ctp_orbital_prod = hirs_ctp_orbital_comp.dataset('out').product( hirs_ctp_orbital_context) if SPC.exists(hirs_ctp_orbital_prod): task.input('CTPO', hirs_ctp_orbital_prod) else: raise WorkflowNotReady( 'No HIRS CTP Orbital inputs available for {}'.format( hirs_ctp_orbital_context['granule'])) # # CFSR Input # cfsr_granule = round_datetime(context['granule'], timedelta(hours=6)) cfsr_file = self.get_cfsr(cfsr_granule) if cfsr_file is not None: task.input('CFSR', cfsr_file) else: raise WorkflowNotReady( 'No CFSR inputs available for {}'.format(cfsr_granule)) LOG.debug("Final task.inputs...") # GPC for task_key in task.inputs.keys(): LOG.debug("\t{}: {}".format(task_key, task.inputs[task_key])) # GPC LOG.debug("Exiting build_task()...") # GPC