Esempio n. 1
0
    def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id,
                      hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id,
                      hirs_csrb_monthly_delivery_id,
                      hirs_ctp_orbital_delivery_id, hirs_ctp_daily_delivery_id,
                      hirs_ctp_monthly_delivery_id,
                      hirs_tpw_orbital_delivery_id,
                      hirs_tpw_daily_delivery_id):

        granules = [
            g.left for g in time_interval.overlapping_interval_series(
                timedelta(days=1), timedelta(days=1))
        ]

        LOG.debug("Running find_contexts()")
        return [{
            'granule': g,
            'satellite': satellite,
            'hirs2nc_delivery_id': hirs2nc_delivery_id,
            'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id,
            'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id,
            'hirs_csrb_monthly_delivery_id': hirs_csrb_monthly_delivery_id,
            'hirs_ctp_orbital_delivery_id': hirs_ctp_orbital_delivery_id,
            'hirs_ctp_daily_delivery_id': hirs_ctp_daily_delivery_id,
            'hirs_ctp_monthly_delivery_id': hirs_ctp_monthly_delivery_id,
            'hirs_tpw_orbital_delivery_id': hirs_tpw_orbital_delivery_id,
            'hirs_tpw_daily_delivery_id': hirs_tpw_daily_delivery_id
        } for g in granules]
Esempio n. 2
0
    def run_task(self, inputs, context):

        LOG.debug("Running run_task()...")

        for key in context.keys():
            LOG.debug("run_task() context['{}'] = {}".format(
                key, context[key]))

        rc = 0

        # Extract a binary array from a CFSR reanalysis GRIB2 file on a
        # global equal angle grid at 0.5 degree resolution. CFSR files
        rc, cfsr_files = self.extract_bin_from_cfsr(inputs, context)

        # Create the CFSR statistics for the current day.
        rc, output_stats_file = self.create_cfsr_statistics(
            inputs, context, cfsr_files)
        if rc != 0:
            LOG.warn('Something went wrong, rc={}...'.format(rc))
            return {}
        LOG.debug('create_cfsr_statistics() generated {}...'.format(
            output_stats_file))

        # Create the CFSR means for the current day
        rc, output_means_file = self.create_cfsr_means(inputs, context,
                                                       output_stats_file)
        if rc != 0:
            return rc
        LOG.debug(
            'create_cfsr_means() generated {}...'.format(output_means_file))

        LOG.debug('python return value = {}'.format(rc))

        extra_attrs = {
            'begin_time':
            context['granule'],
            'end_time':
            context['granule'] + timedelta(days=1) - timedelta(seconds=1)
        }

        LOG.debug('extra_attrs = {}'.format(extra_attrs))

        return {
            'stats': {
                'file': nc_compress(output_stats_file),
                'extra_attrs': extra_attrs
            },
            'means': {
                'file': nc_compress(output_means_file),
                'extra_attrs': extra_attrs
            }
        }
Esempio n. 3
0
    def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id,
                      hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id):

        granules = [
            g.left for g in time_interval.overlapping_interval_series(
                timedelta(days=1), timedelta(days=1))
        ]

        return [{
            'granule': g,
            'satellite': satellite,
            'hirs2nc_delivery_id': hirs2nc_delivery_id,
            'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id,
            'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id
        } for g in granules]
Esempio n. 4
0
    def cfsr_input(self, cfsr_bin_files, interval):

        # Get the CFSR datetime (00z, 06z, 12z, 18z, 00z) which is closest to the start
        # of the HIRS interval
        cfsr_granule = round_datetime(interval.left, timedelta(hours=6))

        # Construct old and new CFSR filenames based on the CFSR datetime
        pgbhnl_filename = 'pgbhnl.gdas.{}.grb2.bin'.format(
            cfsr_granule.strftime('%Y%m%d%H'))
        cdas1_filename = 'cdas1.{}.t{}z.pgrbhanl.grib2.bin'.format(
            cfsr_granule.strftime('%Y%m%d'), cfsr_granule.strftime('%H'))
        LOG.debug("pgbhnl_filename file is {}".format(pgbhnl_filename))
        LOG.debug("cdas1_filename file is {}".format(cdas1_filename))

        for files in cfsr_bin_files:
            LOG.debug("Candidate file is {}".format(files))
            if files == pgbhnl_filename:
                LOG.debug("We have a CFSR file match: {}".format(files))
                return files
            elif files == cdas1_filename:
                LOG.debug("We have a CFSR file match: {}".format(files))
                return files
            else:
                pass

        return None
Esempio n. 5
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''

        LOG.debug("Running build_task()")

        # Instantiate the hirs_ctp_daily computation
        hirs_ctp_daily_comp = hirs_ctp_daily.HIRS_CTP_DAILY()

        num_days = monthrange(context['granule'].year,
                              context['granule'].month)[1]
        interval = TimeInterval(context['granule'],
                                context['granule'] + timedelta(num_days),
                                False, True)

        daily_contexts = hirs_ctp_daily_comp.find_contexts(
            interval, context['satellite'], context['hirs2nc_delivery_id'],
            context['hirs_avhrr_delivery_id'],
            context['hirs_csrb_daily_delivery_id'],
            context['hirs_csrb_monthly_delivery_id'],
            context['hirs_ctp_orbital_delivery_id'],
            context['hirs_ctp_daily_delivery_id'])

        if len(daily_contexts) == 0:
            raise WorkflowNotReady(
                'No HIRS_CTP_DAILY inputs available for {}'.format(
                    context['granule']))

        for (idx, daily_context) in enumerate(daily_contexts):
            hirs_ctp_daily_prod = hirs_ctp_daily_comp.dataset('out').product(
                daily_context)
            if SPC.exists(hirs_ctp_daily_prod):
                task.input('CTPD-{}'.format(idx), hirs_ctp_daily_prod, True)
Esempio n. 6
0
    def find_contexts(self, time_interval, satellite, hirs2nc_delivery_id,
                      hirs_avhrr_delivery_id, hirs_csrb_daily_delivery_id,
                      hirs_csrb_monthly_delivery_id,
                      hirs_ctp_orbital_delivery_id, hirs_ctp_daily_delivery_id,
                      hirs_ctp_monthly_delivery_id):

        granules = []

        start = datetime(time_interval.left.year, time_interval.left.month, 1)
        end = datetime(time_interval.right.year, time_interval.right.month, 1)
        date = start

        while date <= end:
            granules.append(date)
            date = date + timedelta(days=monthrange(date.year, date.month)[1])

        return [{
            'granule': g,
            'satellite': satellite,
            'hirs2nc_delivery_id': hirs2nc_delivery_id,
            'hirs_avhrr_delivery_id': hirs_avhrr_delivery_id,
            'hirs_csrb_daily_delivery_id': hirs_csrb_daily_delivery_id,
            'hirs_csrb_monthly_delivery_id': hirs_csrb_monthly_delivery_id,
            'hirs_ctp_orbital_delivery_id': hirs_ctp_orbital_delivery_id,
            'hirs_ctp_daily_delivery_id': hirs_ctp_daily_delivery_id,
            'hirs_ctp_monthly_delivery_id': hirs_ctp_monthly_delivery_id
        } for g in granules]
Esempio n. 7
0
    def get_cfsr(self, granule):
        '''
        Retrieve the CFSR file which covers the desired granule.
        '''

        wedge = timedelta(seconds=1)
        day = timedelta(days=1)

        cfsr_granule = round_datetime(granule, timedelta(hours=6))
        cfsr_file = None

        have_cfsr_file = False

        # Search for the old style pgbhnl.gdas.*.grb2 file from DAWG
        if not have_cfsr_file:
            LOG.debug(
                "Trying to retrieve CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG..."
            )
            try:
                cfsr_file = dawg_catalog.file('', 'CFSR_PGRBHANL',
                                              cfsr_granule)
                have_cfsr_file = True
            except Exception, err:
                LOG.debug("{}.".format(err))
Esempio n. 8
0
    def hirs_to_time_interval(self, filename):
        '''
        Takes the HIRS filename as input and returns the 1-day time interval
        covering that file.
        '''

        file_chunks = filename.split('.')
        begin_time = datetime.strptime('.'.join(file_chunks[3:5]),
                                       'D%y%j.S%H%M')
        end_time = datetime.strptime(
            '.'.join([file_chunks[3], file_chunks[5]]), 'D%y%j.E%H%M')

        if end_time < begin_time:
            end_time += timedelta(days=1)

        return TimeInterval(begin_time, end_time)
Esempio n. 9
0
    def process_metadata(self, file_type):
        '''
        Run through the *.out file and create a nested dictionary containing the
        the required metadata for the files of each file_type.
        '''
        LOG.debug("Reading the metadata file...")

        metadata_file = self.input_data[file_type]
        LOG.debug("For file_type = '{}', metadata_file = {}".format(
            file_type, metadata_file))

        # Open the metadata file and read each line, splitting into the required
        # metadata for each file.
        with open(metadata_file) as metadata:
            for line in metadata:
                (size, mod_time, begin_time, end_time, sensor, sat, file_type,
                 file_system, relative_path) = line.split(',')
                begin_time = datetime.fromtimestamp(float(begin_time))
                end_time = datetime.fromtimestamp(float(end_time))
                relative_path = relative_path.split()[0]
                name = os.path.basename(relative_path)

                if sensor not in self.file_data:
                    self.file_data[sensor] = {}

                if sat not in self.file_data[sensor]:
                    self.file_data[sensor][sat] = {}

                if file_type not in self.file_data[sensor][sat]:
                    self.file_data[sensor][sat][file_type] = {}

                if name not in self.file_data[sensor][sat][file_type]:

                    if end_time < begin_time:
                        LOG.warning(
                            "{}: End time {} is before begin time {}, adding a day."
                            .format(name, end_time, begin_time))
                        end_time += timedelta(days=1)

                    self.file_data[sensor][sat][file_type][name] = {
                        'data_interval': TimeInterval(begin_time, end_time),
                        'name': name,
                        'path': relative_path
                    }

        self.indexed_file_types[file_type] = 1
from flo.ui import local_prepare, local_execute

import flo.sw.hirs2nc as hirs2nc
import flo.sw.hirs_avhrr as hirs_avhrr
from flo.sw.hirs2nc.utils import setup_logging

# every module should have a LOG object
LOG = logging.getLogger(__name__)

#
# General information
#

#hirs2nc_delivery_id = '20180410-1'
#hirs_avhrr_delivery_id = '20180505-1'
wedge = timedelta(seconds=1.)

# Satellite specific information

#granule = datetime(2017, 1, 1, 0, 32)
#interval = TimeInterval(granule, granule+timedelta(seconds=0))


def setup_computation(satellite):

    #satellite = 'metop-b'
    input_data = {
        'HIR1B':
        '/mnt/software/flo/hirs_l1b_datalists/{0:}/HIR1B_{0:}_latest'.format(
            satellite),
        'CFSR':
Esempio n. 11
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''
        global delta_catalog

        LOG.debug("Running build_task()")
        LOG.debug("context:  {}".format(context))

        # Initialize the hirs2nc and hirs_avhrr modules with the data locations
        hirs2nc.delta_catalog = delta_catalog
        hirs_avhrr.delta_catalog = delta_catalog

        # Instantiate the hirs and hirs_avhrr computations
        hirs2nc_comp = hirs2nc.HIRS2NC()
        hirs_avhrr_comp = hirs_avhrr.HIRS_AVHRR()

        SPC = StoredProductCatalog()

        day = TimeInterval(
            context['granule'],
            (context['granule'] + timedelta(days=1) - timedelta(seconds=1)))

        hirs2nc_contexts = hirs2nc_comp.find_contexts(
            day, context['satellite'], context['hirs2nc_delivery_id'])

        if len(hirs2nc_contexts) == 0:
            raise WorkflowNotReady('NO HIRS Data For {}'.format(
                context['granule']))

        # Input Counter.
        ic = 0

        for hirs2nc_context in hirs2nc_contexts:

            # Making Input contexts
            hirs_avhrr_context = hirs2nc_context.copy()
            hirs_avhrr_context['hirs_avhrr_delivery_id'] = context[
                'hirs_avhrr_delivery_id']

            LOG.debug("HIRS context:        {}".format(hirs2nc_context))
            LOG.debug("HIRS_AVHRR context:  {}".format(hirs_avhrr_context))

            # Confirming we have HIRS1B and COLLO products...
            hirs2nc_prod = hirs2nc_comp.dataset('out').product(hirs2nc_context)
            hirs_avhrr_prod = hirs_avhrr_comp.dataset('out').product(
                hirs_avhrr_context)

            # If HIRS1B and COLLO products exist, add them and the Patmos-X
            # file for this context to the list of input files to be downloaded to
            # the workspace...
            if SPC.exists(hirs2nc_prod) and SPC.exists(hirs_avhrr_prod):
                # Its safe to require all three inputs
                task.input('HIR1B-{}'.format(ic), hirs2nc_prod)
                task.input('COLLO-{}'.format(ic), hirs_avhrr_prod)
                task.input(
                    'PTMSX-{}'.format(ic),
                    delta_catalog.file('avhrr', hirs2nc_context['satellite'],
                                       'PTMSX', hirs2nc_context['granule']))
                ic += 1

        LOG.debug(
            "There are {} valid HIR1B/COLLO/PTMSX contexts in ({} -> {})".
            format(ic, day.left, day.right))

        if ic == 0:
            LOG.warn(
                "There are no valid HIR1B/COLLO/PTMSX contexts in ({} -> {}), aborting..."
                .format(day.left, day.right))
            return

        interval = TimeInterval(context['granule'],
                                context['granule'] + timedelta(days=1))

        num_cfsr_files = 0

        # Search for the old style pgbhnl.gdas.*.grb2 files from the PEATE
        if num_cfsr_files == 0:
            LOG.debug(
                "Trying to retrieve CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG..."
            )
            try:
                cfsr_files = dawg_catalog.files('', 'CFSR_PGRBHANL', interval)
                num_cfsr_files = len(cfsr_files)
                if num_cfsr_files == 0:
                    LOG.debug("\tpgbhnl.gdas.*.grb2 CFSR files from DAWG : {}".
                              format(cfsr_files))
            except Exception, err:
                LOG.error("{}.".format(err))
                LOG.warn(
                    "Retrieval of CFSR_PGRBHANL product (pgbhnl.gdas.*.grb2) CFSR files from DAWG failed"
                )
Esempio n. 12
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''
        global delta_catalog

        LOG.debug("Running build_task()")

        # Initialize the hirs_tpw_orbital module with the data locations
        hirs_tpw_orbital.delta_catalog = delta_catalog

        # Instantiate the hirs_tpw_orbital computation
        hirs_tpw_orbital_comp = hirs_tpw_orbital.HIRS_TPW_ORBITAL()

        SPC = StoredProductCatalog()

        # TPW Orbital Input

        granule = context['granule']
        wedge = timedelta(seconds=1)
        hour = timedelta(hours=1)
        day = timedelta(days=1)

        # Add an hour to each end of the day to make sure the day is completely covered
        interval = TimeInterval(context['granule'] - 1 * hour,
                                (context['granule'] + day + 1 * hour))

        hirs_tpw_orbital_contexts = hirs_tpw_orbital_comp.find_contexts(
            interval, context['satellite'], context['hirs2nc_delivery_id'],
            context['hirs_avhrr_delivery_id'],
            context['hirs_csrb_daily_delivery_id'],
            context['hirs_csrb_monthly_delivery_id'],
            context['hirs_ctp_orbital_delivery_id'],
            context['hirs_ctp_daily_delivery_id'],
            context['hirs_ctp_monthly_delivery_id'],
            context['hirs_tpw_orbital_delivery_id'])

        if len(hirs_tpw_orbital_contexts) == 0:
            raise WorkflowNotReady(
                'No HIRS_TPW_ORBITAL inputs available for {}'.format(
                    context['granule']))

        LOG.debug("There are {} TPW Orbital contexts for {}.".format(
            len(hirs_tpw_orbital_contexts), interval))

        for context in hirs_tpw_orbital_contexts:
            LOG.debug(context)

        # Knock off all but the last of the "previous" day's contexts
        this_day = granule.day
        previous_day = (granule - day + wedge).day
        next_day = (granule + day + wedge).day
        LOG.debug("previous_day: {}".format(previous_day))
        LOG.debug("this_day: {}".format(this_day))
        LOG.debug("next_day: {}".format(next_day))

        start_idx = 0
        end_idx = -1
        num_contexts = len(hirs_tpw_orbital_contexts)

        indices = np.arange(num_contexts)
        reverse_indices = np.flip(np.arange(num_contexts) - num_contexts,
                                  axis=0)

        # have this set to zero unless we need to set it otherwise (say for Metop-B)
        interval_pad = 0

        # Pruning all but the last of the previous day's contexts
        for idx in indices:
            if hirs_tpw_orbital_contexts[
                    idx + interval_pad]['granule'].day == this_day:
                start_idx = idx
                LOG.debug("Breaking: start_idx = {}, granule = {}".format(
                    start_idx,
                    hirs_tpw_orbital_contexts[start_idx]['granule']))
                break

        # Pruning all but the first of the next day's contexts
        for idx in reverse_indices:
            if hirs_tpw_orbital_contexts[
                    idx - interval_pad]['granule'].day == this_day:
                end_idx = idx
                LOG.debug("Breaking: end_idx = {}, granule = {}".format(
                    end_idx, hirs_tpw_orbital_contexts[end_idx]['granule']))
                break

        hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[
            start_idx:end_idx + 1]
        #hirs_tpw_orbital_contexts = hirs_tpw_orbital_contexts[start_idx:end_idx]
        for context in hirs_tpw_orbital_contexts:
            LOG.debug("{}".format(context))

        for idx, context in enumerate(hirs_tpw_orbital_contexts):
            hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset(
                'shift').product(context)
            if SPC.exists(hirs_tpw_orbital_prod):
                task.input('TPWO_shift-{}'.format(str(idx).zfill(2)),
                           hirs_tpw_orbital_prod)

        for idx, context in enumerate(hirs_tpw_orbital_contexts):
            hirs_tpw_orbital_prod = hirs_tpw_orbital_comp.dataset(
                'noshift').product(context)
            if SPC.exists(hirs_tpw_orbital_prod):
                task.input('TPWO_noshift-{}'.format(str(idx).zfill(2)),
                           hirs_tpw_orbital_prod)
import flo.sw.hirs_ctp_daily as hirs_ctp_daily
from flo.sw.hirs2nc.utils import setup_logging

# every module should have a LOG object
LOG = logging.getLogger(__name__)

setup_logging(2)

# General information
hirs2nc_delivery_id = '20180410-1'
hirs_avhrr_delivery_id = '20180505-1'
hirs_csrb_daily_delivery_id = '20180714-1'
hirs_csrb_monthly_delivery_id = '20180516-1'
hirs_ctp_orbital_delivery_id = '20180730-1'
hirs_ctp_daily_delivery_id = '20180802-1'
wedge = timedelta(seconds=1.)
day = timedelta(days=1.)
hour = timedelta(hours=1.)

# Satellite specific information

#satellite = 'noaa-07'
#granule = datetime(2015, 4, 17, 0, 20)
#intervals = [TimeInterval(granule, granule + wedge - wedge)]
# NSS.GHRR.NC.D81236.S0013.E0207.B0086970.WI.level2.hdf --> NSS.GHRR.NC.D85032.S2221.E0009.B1863132.GC.level2.hdf
# datetime(1981, 8, 24, 0, 13) --> datetime(1985, 2, 1, 0, 9)
#intervals = []
#for years in range(1981, 1986):
#intervals += [TimeInterval(datetime(years,month,1), datetime(years,month,calendar.monthrange(years,month)[1])+day-wedge) for month in range(1,13) ]

#satellite = 'noaa-09'
LOG = logging.getLogger(__name__)

#
# General information
#

#hirs2nc_delivery_id = '20180410-1'
#hirs_avhrr_delivery_id = '20180505-1'
#hirs_csrb_daily_delivery_id  = '20180714-1'
#hirs_csrb_monthly_delivery_id  = '20180516-1'
#hirs_ctp_orbital_delivery_id  = '20180730-1'
#hirs_ctp_daily_delivery_id  = '20180802-1'
#hirs_ctp_monthly_delivery_id  = '20180803-1'
#hirs_tpw_orbital_delivery_id = '20190205-1'
#hirs_tpw_daily_delivery_id = '20190318-1'
wedge = timedelta(seconds=1.)
day = timedelta(days=1.)

# Satellite specific information

#granule = datetime(2017, 1, 1, 0)
#interval = TimeInterval(granule, granule+day-wedge)

def setup_computation(satellite):

    input_data = {'HIR1B': '/mnt/software/flo/hirs_l1b_datalists/{0:}/HIR1B_{0:}_latest'.format(satellite),
                  'CFSR':  '/mnt/cephfs_data/geoffc/hirs_data_lists/CFSR.out',
                  'PTMSX': '/mnt/software/flo/hirs_l1b_datalists/{0:}/PTMSX_{0:}_latest'.format(satellite)}

    # Data locations
    collection = {'HIR1B': 'ARCDATA',
Esempio n. 15
0
    def build_task(self, context, task):
        '''
        Build up a set of inputs for a single context
        '''
        global delta_catalog

        LOG.debug("Running build_task()")

        # Initialize the hirs2nc module with the data locations
        hirs2nc.delta_catalog = delta_catalog

        # Instantiate the hirs2nc and hirs_ctp_orbital computations
        hirs2nc_comp = hirs2nc.HIRS2NC()
        hirs_ctp_orbital_comp = hirs_ctp_orbital.HIRS_CTP_ORBITAL()

        SPC = StoredProductCatalog()

        #
        # HIRS L1B Input
        #
        hirs2nc_context = {
            'satellite': context['satellite'],
            'granule': context['granule'],
            'hirs2nc_delivery_id': context['hirs2nc_delivery_id']
        }

        hirs2nc_prod = hirs2nc_comp.dataset('out').product(hirs2nc_context)

        if SPC.exists(hirs2nc_prod):
            task.input('HIR1B', hirs2nc_prod)
        else:
            raise WorkflowNotReady('No HIRS inputs available for {}'.format(
                hirs2nc_context['granule']))

        #
        # CTP Orbital Input
        #
        hirs_ctp_orbital_context = context.copy()
        [
            hirs_ctp_orbital_context.pop(k) for k in [
                'hirs_ctp_daily_delivery_id', 'hirs_ctp_monthly_delivery_id',
                'hirs_tpw_orbital_delivery_id'
            ]
        ]

        hirs_ctp_orbital_prod = hirs_ctp_orbital_comp.dataset('out').product(
            hirs_ctp_orbital_context)

        if SPC.exists(hirs_ctp_orbital_prod):
            task.input('CTPO', hirs_ctp_orbital_prod)
        else:
            raise WorkflowNotReady(
                'No HIRS CTP Orbital inputs available for {}'.format(
                    hirs_ctp_orbital_context['granule']))

        #
        # CFSR Input
        #
        cfsr_granule = round_datetime(context['granule'], timedelta(hours=6))
        cfsr_file = self.get_cfsr(cfsr_granule)

        if cfsr_file is not None:
            task.input('CFSR', cfsr_file)
        else:
            raise WorkflowNotReady(
                'No CFSR inputs available for {}'.format(cfsr_granule))

        LOG.debug("Final task.inputs...")  # GPC
        for task_key in task.inputs.keys():
            LOG.debug("\t{}: {}".format(task_key,
                                        task.inputs[task_key]))  # GPC

        LOG.debug("Exiting build_task()...")  # GPC